1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 #include "esclex.h" 58 59 /* imported from eft.c... */ 60 extern char *Autoclose; 61 extern int Dupclose; 62 extern hrtime_t Hesitate; 63 extern char *Serd_Override; 64 extern nv_alloc_t Eft_nv_hdl; 65 extern int Max_fme; 66 extern fmd_hdl_t *Hdl; 67 68 static int Istat_need_save; 69 static int Serd_need_save; 70 void istat_save(void); 71 void serd_save(void); 72 73 /* fme under construction is global so we can free it on module abort */ 74 static struct fme *Nfmep; 75 76 static const char *Undiag_reason; 77 78 static int Nextid = 0; 79 80 static int Open_fme_count = 0; /* Count of open FMEs */ 81 82 /* list of fault management exercises underway */ 83 static struct fme { 84 struct fme *next; /* next exercise */ 85 unsigned long long ull; /* time when fme was created */ 86 int id; /* FME id */ 87 struct config *config; /* cooked configuration data */ 88 struct lut *eventtree; /* propagation tree for this FME */ 89 /* 90 * The initial error report that created this FME is kept in 91 * two forms. e0 points to the instance tree node and is used 92 * by fme_eval() as the starting point for the inference 93 * algorithm. e0r is the event handle FMD passed to us when 94 * the ereport first arrived and is used when setting timers, 95 * which are always relative to the time of this initial 96 * report. 97 */ 98 struct event *e0; 99 fmd_event_t *e0r; 100 101 id_t timer; /* for setting an fmd time-out */ 102 103 struct event *ecurrent; /* ereport under consideration */ 104 struct event *suspects; /* current suspect list */ 105 struct event *psuspects; /* previous suspect list */ 106 int nsuspects; /* count of suspects */ 107 int nonfault; /* zero if all suspects T_FAULT */ 108 int posted_suspects; /* true if we've posted a diagnosis */ 109 int uniqobs; /* number of unique events observed */ 110 int peek; /* just peeking, don't track suspects */ 111 int overflow; /* true if overflow FME */ 112 enum fme_state { 113 FME_NOTHING = 5000, /* not evaluated yet */ 114 FME_WAIT, /* need to wait for more info */ 115 FME_CREDIBLE, /* suspect list is credible */ 116 FME_DISPROVED, /* no valid suspects found */ 117 FME_DEFERRED /* don't know yet (k-count not met) */ 118 } state; 119 120 unsigned long long pull; /* time passed since created */ 121 unsigned long long wull; /* wait until this time for re-eval */ 122 struct event *observations; /* observation list */ 123 struct lut *globals; /* values of global variables */ 124 /* fmd interfacing */ 125 fmd_hdl_t *hdl; /* handle for talking with fmd */ 126 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 127 /* stats */ 128 struct stats *Rcount; 129 struct stats *Hcallcount; 130 struct stats *Rcallcount; 131 struct stats *Ccallcount; 132 struct stats *Ecallcount; 133 struct stats *Tcallcount; 134 struct stats *Marrowcount; 135 struct stats *diags; 136 } *FMElist, *EFMElist, *ClosedFMEs; 137 138 static struct case_list { 139 fmd_case_t *fmcase; 140 struct case_list *next; 141 } *Undiagablecaselist; 142 143 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 144 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 145 unsigned long long at_latest_by, unsigned long long *pdelay); 146 static struct node *eventprop_lookup(struct event *ep, const char *propname); 147 static struct node *pathstring2epnamenp(char *path); 148 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, 149 fmd_case_t *fmcase); 150 static void restore_suspects(struct fme *fmep); 151 static void save_suspects(struct fme *fmep); 152 static void destroy_fme(struct fme *f); 153 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 154 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 155 static void istat_counter_reset_cb(struct istat_entry *entp, 156 struct stats *statp, const struct ipath *ipp); 157 static void istat_counter_topo_chg_cb(struct istat_entry *entp, 158 struct stats *statp, void *unused); 159 static void serd_reset_cb(struct serd_entry *entp, void *unused, 160 const struct ipath *ipp); 161 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused, 162 void *unused2); 163 static void destroy_fme_bufs(struct fme *fp); 164 165 static struct fme * 166 alloc_fme(void) 167 { 168 struct fme *fmep; 169 170 fmep = MALLOC(sizeof (*fmep)); 171 bzero(fmep, sizeof (*fmep)); 172 return (fmep); 173 } 174 175 /* 176 * fme_ready -- called when all initialization of the FME (except for 177 * stats) has completed successfully. Adds the fme to global lists 178 * and establishes its stats. 179 */ 180 static struct fme * 181 fme_ready(struct fme *fmep) 182 { 183 char nbuf[100]; 184 185 Nfmep = NULL; /* don't need to free this on module abort now */ 186 187 if (EFMElist) { 188 EFMElist->next = fmep; 189 EFMElist = fmep; 190 } else 191 FMElist = EFMElist = fmep; 192 193 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 194 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 195 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 196 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 197 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 198 fmep->Rcallcount = stats_new_counter(nbuf, 199 "calls to requirements_test()", 1); 200 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 201 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 202 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 203 fmep->Ecallcount = 204 stats_new_counter(nbuf, "calls to effects_test()", 1); 205 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 206 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 207 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 208 fmep->Marrowcount = stats_new_counter(nbuf, 209 "arrows marked by mark_arrows()", 1); 210 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 211 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 212 213 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 214 config_print(O_ALTFP|O_VERB2, fmep->config); 215 216 return (fmep); 217 } 218 219 extern void ipath_dummy_lut(struct arrow *); 220 extern struct lut *itree_create_dummy(const char *, const struct ipath *); 221 222 /* ARGSUSED */ 223 static void 224 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 225 { 226 struct bubble *bp; 227 struct arrowlist *ap; 228 229 for (bp = itree_next_bubble(ep, NULL); bp; 230 bp = itree_next_bubble(ep, bp)) { 231 if (bp->t != B_FROM) 232 continue; 233 for (ap = itree_next_arrow(bp, NULL); ap; 234 ap = itree_next_arrow(bp, ap)) { 235 ap->arrowp->pnode->u.arrow.needed = 1; 236 ipath_dummy_lut(ap->arrowp); 237 } 238 } 239 } 240 241 /* ARGSUSED */ 242 static void 243 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 244 { 245 struct bubble *bp; 246 struct arrowlist *ap; 247 248 for (bp = itree_next_bubble(ep, NULL); bp; 249 bp = itree_next_bubble(ep, bp)) { 250 if (bp->t != B_FROM) 251 continue; 252 for (ap = itree_next_arrow(bp, NULL); ap; 253 ap = itree_next_arrow(bp, ap)) 254 ap->arrowp->pnode->u.arrow.needed = 0; 255 } 256 } 257 258 static void globals_destructor(void *left, void *right, void *arg); 259 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep); 260 261 static void 262 prune_propagations(const char *e0class, const struct ipath *e0ipp) 263 { 264 char nbuf[100]; 265 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 266 extern struct lut *Usednames; 267 268 Nfmep = alloc_fme(); 269 Nfmep->id = Nextid; 270 Nfmep->state = FME_NOTHING; 271 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp); 272 if ((Nfmep->e0 = 273 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 274 out(O_ALTFP, "prune_propagations: e0 not in instance tree"); 275 itree_free(Nfmep->eventtree); 276 FREE(Nfmep); 277 Nfmep = NULL; 278 return; 279 } 280 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0; 281 Nfmep->e0->count++; 282 283 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id); 284 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 285 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id); 286 Nfmep->Hcallcount = 287 stats_new_counter(nbuf, "calls to hypothesise()", 1); 288 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id); 289 Nfmep->Rcallcount = stats_new_counter(nbuf, 290 "calls to requirements_test()", 1); 291 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id); 292 Nfmep->Ccallcount = 293 stats_new_counter(nbuf, "calls to causes_test()", 1); 294 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id); 295 Nfmep->Ecallcount = 296 stats_new_counter(nbuf, "calls to effects_test()", 1); 297 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id); 298 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 299 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id); 300 Nfmep->Marrowcount = stats_new_counter(nbuf, 301 "arrows marked by mark_arrows()", 1); 302 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id); 303 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 304 305 Nfmep->peek = 1; 306 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep); 307 lut_free(Usednames, NULL, NULL); 308 Usednames = NULL; 309 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep); 310 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay); 311 itree_prune(Nfmep->eventtree); 312 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep); 313 314 stats_delete(Nfmep->Rcount); 315 stats_delete(Nfmep->Hcallcount); 316 stats_delete(Nfmep->Rcallcount); 317 stats_delete(Nfmep->Ccallcount); 318 stats_delete(Nfmep->Ecallcount); 319 stats_delete(Nfmep->Tcallcount); 320 stats_delete(Nfmep->Marrowcount); 321 stats_delete(Nfmep->diags); 322 itree_free(Nfmep->eventtree); 323 lut_free(Nfmep->globals, globals_destructor, NULL); 324 FREE(Nfmep); 325 } 326 327 static struct fme * 328 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl, 329 fmd_case_t *fmcase) 330 { 331 struct cfgdata *cfgdata; 332 int init_size; 333 extern int alloc_total(); 334 335 init_size = alloc_total(); 336 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size); 337 if ((cfgdata = config_snapshot()) == NULL) { 338 out(O_ALTFP, "newfme: NULL configuration"); 339 Undiag_reason = UD_NOCONF; 340 return (NULL); 341 } 342 platform_save_config(hdl, fmcase); 343 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes", 344 alloc_total() - init_size); 345 346 Nfmep = alloc_fme(); 347 348 Nfmep->id = Nextid++; 349 Nfmep->config = cfgdata->cooked; 350 config_free(cfgdata); 351 Nfmep->posted_suspects = 0; 352 Nfmep->uniqobs = 0; 353 Nfmep->state = FME_NOTHING; 354 Nfmep->pull = 0ULL; 355 Nfmep->overflow = 0; 356 357 Nfmep->fmcase = fmcase; 358 Nfmep->hdl = hdl; 359 360 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) { 361 out(O_ALTFP, "newfme: NULL instance tree"); 362 Undiag_reason = UD_INSTFAIL; 363 structconfig_free(Nfmep->config); 364 destroy_fme_bufs(Nfmep); 365 FREE(Nfmep); 366 Nfmep = NULL; 367 return (NULL); 368 } 369 370 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 371 372 if ((Nfmep->e0 = 373 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 374 out(O_ALTFP, "newfme: e0 not in instance tree"); 375 Undiag_reason = UD_BADEVENTI; 376 itree_free(Nfmep->eventtree); 377 structconfig_free(Nfmep->config); 378 destroy_fme_bufs(Nfmep); 379 FREE(Nfmep); 380 Nfmep = NULL; 381 return (NULL); 382 } 383 384 return (fme_ready(Nfmep)); 385 } 386 387 void 388 fme_fini(void) 389 { 390 struct fme *sfp, *fp; 391 struct case_list *ucasep, *nextcasep; 392 393 ucasep = Undiagablecaselist; 394 while (ucasep != NULL) { 395 nextcasep = ucasep->next; 396 FREE(ucasep); 397 ucasep = nextcasep; 398 } 399 Undiagablecaselist = NULL; 400 401 /* clean up closed fmes */ 402 fp = ClosedFMEs; 403 while (fp != NULL) { 404 sfp = fp->next; 405 destroy_fme(fp); 406 fp = sfp; 407 } 408 ClosedFMEs = NULL; 409 410 fp = FMElist; 411 while (fp != NULL) { 412 sfp = fp->next; 413 destroy_fme(fp); 414 fp = sfp; 415 } 416 FMElist = EFMElist = NULL; 417 418 /* if we were in the middle of creating an fme, free it now */ 419 if (Nfmep) { 420 destroy_fme(Nfmep); 421 Nfmep = NULL; 422 } 423 } 424 425 /* 426 * Allocated space for a buffer name. 20 bytes allows for 427 * a ridiculous 9,999,999 unique observations. 428 */ 429 #define OBBUFNMSZ 20 430 431 /* 432 * serialize_observation 433 * 434 * Create a recoverable version of the current observation 435 * (f->ecurrent). We keep a serialized version of each unique 436 * observation in order that we may resume correctly the fme in the 437 * correct state if eft or fmd crashes and we're restarted. 438 */ 439 static void 440 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 441 { 442 size_t pkdlen; 443 char tmpbuf[OBBUFNMSZ]; 444 char *pkd = NULL; 445 char *estr; 446 447 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 448 estr = ipath2str(cls, ipp); 449 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 450 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 451 strlen(estr) + 1); 452 FREE(estr); 453 454 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 455 (void) snprintf(tmpbuf, 456 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 457 if (nvlist_xpack(fp->ecurrent->nvp, 458 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 459 out(O_DIE|O_SYS, "pack of observed nvl failed"); 460 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 461 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 462 FREE(pkd); 463 } 464 465 fp->uniqobs++; 466 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 467 sizeof (fp->uniqobs)); 468 } 469 470 /* 471 * init_fme_bufs -- We keep several bits of state about an fme for 472 * use if eft or fmd crashes and we're restarted. 473 */ 474 static void 475 init_fme_bufs(struct fme *fp) 476 { 477 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 478 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 479 sizeof (fp->pull)); 480 481 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 482 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 483 sizeof (fp->id)); 484 485 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 486 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 487 sizeof (fp->uniqobs)); 488 489 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 490 sizeof (fp->posted_suspects)); 491 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 492 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 493 } 494 495 static void 496 destroy_fme_bufs(struct fme *fp) 497 { 498 char tmpbuf[OBBUFNMSZ]; 499 int o; 500 501 platform_restore_config(fp->hdl, fp->fmcase); 502 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 503 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 504 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 505 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 506 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 507 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 508 509 for (o = 0; o < fp->uniqobs; o++) { 510 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 511 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 512 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 513 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 514 } 515 } 516 517 /* 518 * reconstitute_observations -- convert a case's serialized observations 519 * back into struct events. Returns zero if all observations are 520 * successfully reconstituted. 521 */ 522 static int 523 reconstitute_observations(struct fme *fmep) 524 { 525 struct event *ep; 526 struct node *epnamenp = NULL; 527 size_t pkdlen; 528 char *pkd = NULL; 529 char *tmpbuf = alloca(OBBUFNMSZ); 530 char *sepptr; 531 char *estr; 532 int ocnt; 533 int elen; 534 535 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 536 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 537 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 538 if (elen == 0) { 539 out(O_ALTFP, 540 "reconstitute_observation: no %s buffer found.", 541 tmpbuf); 542 Undiag_reason = UD_MISSINGOBS; 543 break; 544 } 545 546 estr = MALLOC(elen); 547 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 548 sepptr = strchr(estr, '@'); 549 if (sepptr == NULL) { 550 out(O_ALTFP, 551 "reconstitute_observation: %s: " 552 "missing @ separator in %s.", 553 tmpbuf, estr); 554 Undiag_reason = UD_MISSINGPATH; 555 FREE(estr); 556 break; 557 } 558 559 *sepptr = '\0'; 560 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 561 out(O_ALTFP, 562 "reconstitute_observation: %s: " 563 "trouble converting path string \"%s\" " 564 "to internal representation.", 565 tmpbuf, sepptr + 1); 566 Undiag_reason = UD_MISSINGPATH; 567 FREE(estr); 568 break; 569 } 570 571 /* construct the event */ 572 ep = itree_lookup(fmep->eventtree, 573 stable(estr), ipath(epnamenp)); 574 if (ep == NULL) { 575 out(O_ALTFP, 576 "reconstitute_observation: %s: " 577 "lookup of \"%s\" in itree failed.", 578 tmpbuf, ipath2str(estr, ipath(epnamenp))); 579 Undiag_reason = UD_BADOBS; 580 tree_free(epnamenp); 581 FREE(estr); 582 break; 583 } 584 tree_free(epnamenp); 585 586 /* 587 * We may or may not have a saved nvlist for the observation 588 */ 589 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 590 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 591 if (pkdlen != 0) { 592 pkd = MALLOC(pkdlen); 593 fmd_buf_read(fmep->hdl, 594 fmep->fmcase, tmpbuf, pkd, pkdlen); 595 ASSERT(ep->nvp == NULL); 596 if (nvlist_xunpack(pkd, 597 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 598 out(O_DIE|O_SYS, "pack of observed nvl failed"); 599 FREE(pkd); 600 } 601 602 if (ocnt == 0) 603 fmep->e0 = ep; 604 605 FREE(estr); 606 fmep->ecurrent = ep; 607 ep->count++; 608 609 /* link it into list of observations seen */ 610 ep->observations = fmep->observations; 611 fmep->observations = ep; 612 } 613 614 if (ocnt == fmep->uniqobs) { 615 (void) fme_ready(fmep); 616 return (0); 617 } 618 619 return (1); 620 } 621 622 /* 623 * restart_fme -- called during eft initialization. Reconstitutes 624 * an in-progress fme. 625 */ 626 void 627 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 628 { 629 nvlist_t *defect; 630 struct case_list *bad; 631 struct fme *fmep; 632 struct cfgdata *cfgdata; 633 size_t rawsz; 634 struct event *ep; 635 char *tmpbuf = alloca(OBBUFNMSZ); 636 char *sepptr; 637 char *estr; 638 int elen; 639 struct node *epnamenp = NULL; 640 int init_size; 641 extern int alloc_total(); 642 643 /* 644 * ignore solved or closed cases 645 */ 646 if (fmd_case_solved(hdl, inprogress) || 647 fmd_case_closed(hdl, inprogress)) 648 return; 649 650 fmep = alloc_fme(); 651 fmep->fmcase = inprogress; 652 fmep->hdl = hdl; 653 654 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 655 out(O_ALTFP, "restart_fme: no saved posted status"); 656 Undiag_reason = UD_MISSINGINFO; 657 goto badcase; 658 } else { 659 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 660 (void *)&fmep->posted_suspects, 661 sizeof (fmep->posted_suspects)); 662 } 663 664 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 665 out(O_ALTFP, "restart_fme: no saved id"); 666 Undiag_reason = UD_MISSINGINFO; 667 goto badcase; 668 } else { 669 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 670 sizeof (fmep->id)); 671 } 672 if (Nextid <= fmep->id) 673 Nextid = fmep->id + 1; 674 675 out(O_ALTFP, "Replay FME %d", fmep->id); 676 677 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 678 out(O_ALTFP, "restart_fme: No config data"); 679 Undiag_reason = UD_MISSINGINFO; 680 goto badcase; 681 } 682 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 683 sizeof (size_t)); 684 685 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 686 out(O_ALTFP, "restart_fme: No event zero"); 687 Undiag_reason = UD_MISSINGZERO; 688 goto badcase; 689 } 690 691 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 692 out(O_ALTFP, "restart_fme: no saved wait time"); 693 Undiag_reason = UD_MISSINGINFO; 694 goto badcase; 695 } else { 696 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 697 sizeof (fmep->pull)); 698 } 699 700 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 701 out(O_ALTFP, "restart_fme: no count of observations"); 702 Undiag_reason = UD_MISSINGINFO; 703 goto badcase; 704 } else { 705 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 706 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 707 } 708 709 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0"); 710 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 711 if (elen == 0) { 712 out(O_ALTFP, "reconstitute_observation: no %s buffer found.", 713 tmpbuf); 714 Undiag_reason = UD_MISSINGOBS; 715 goto badcase; 716 } 717 estr = MALLOC(elen); 718 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 719 sepptr = strchr(estr, '@'); 720 if (sepptr == NULL) { 721 out(O_ALTFP, "reconstitute_observation: %s: " 722 "missing @ separator in %s.", 723 tmpbuf, estr); 724 Undiag_reason = UD_MISSINGPATH; 725 FREE(estr); 726 goto badcase; 727 } 728 *sepptr = '\0'; 729 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 730 out(O_ALTFP, "reconstitute_observation: %s: " 731 "trouble converting path string \"%s\" " 732 "to internal representation.", tmpbuf, sepptr + 1); 733 Undiag_reason = UD_MISSINGPATH; 734 FREE(estr); 735 goto badcase; 736 } 737 prune_propagations(stable(estr), ipath(epnamenp)); 738 tree_free(epnamenp); 739 FREE(estr); 740 741 init_size = alloc_total(); 742 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size); 743 cfgdata = MALLOC(sizeof (struct cfgdata)); 744 cfgdata->cooked = NULL; 745 cfgdata->devcache = NULL; 746 cfgdata->cpucache = NULL; 747 cfgdata->raw_refcnt = 1; 748 749 if (rawsz > 0) { 750 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 751 out(O_ALTFP, "restart_fme: Config data size mismatch"); 752 Undiag_reason = UD_CFGMISMATCH; 753 goto badcase; 754 } 755 cfgdata->begin = MALLOC(rawsz); 756 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 757 fmd_buf_read(hdl, 758 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 759 } else { 760 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 761 } 762 763 config_cook(cfgdata); 764 fmep->config = cfgdata->cooked; 765 config_free(cfgdata); 766 out(O_ALTFP|O_STAMP, "config_restore added %d bytes", 767 alloc_total() - init_size); 768 769 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) { 770 /* case not properly saved or irretrievable */ 771 out(O_ALTFP, "restart_fme: NULL instance tree"); 772 Undiag_reason = UD_INSTFAIL; 773 goto badcase; 774 } 775 776 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 777 778 if (reconstitute_observations(fmep) != 0) 779 goto badcase; 780 781 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id); 782 for (ep = fmep->observations; ep; ep = ep->observations) { 783 out(O_ALTFP|O_NONL, " "); 784 itree_pevent_brief(O_ALTFP|O_NONL, ep); 785 } 786 out(O_ALTFP, NULL); 787 788 Open_fme_count++; 789 790 /* give the diagnosis algorithm a shot at the new FME state */ 791 fme_eval(fmep, fmep->e0r); 792 return; 793 794 badcase: 795 if (fmep->eventtree != NULL) 796 itree_free(fmep->eventtree); 797 if (fmep->config) 798 structconfig_free(fmep->config); 799 destroy_fme_bufs(fmep); 800 FREE(fmep); 801 802 /* 803 * Since we're unable to restart the case, add it to the undiagable 804 * list and solve and close it as appropriate. 805 */ 806 bad = MALLOC(sizeof (struct case_list)); 807 bad->next = NULL; 808 809 if (Undiagablecaselist != NULL) 810 bad->next = Undiagablecaselist; 811 Undiagablecaselist = bad; 812 bad->fmcase = inprogress; 813 814 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ", 815 fmd_case_uuid(hdl, bad->fmcase)); 816 817 if (fmd_case_solved(hdl, bad->fmcase)) { 818 out(O_ALTFP|O_NONL, "already solved, "); 819 } else { 820 out(O_ALTFP|O_NONL, "solving, "); 821 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 822 NULL, NULL, NULL); 823 if (Undiag_reason != NULL) 824 (void) nvlist_add_string(defect, 825 UNDIAG_REASON, Undiag_reason); 826 fmd_case_add_suspect(hdl, bad->fmcase, defect); 827 fmd_case_solve(hdl, bad->fmcase); 828 } 829 830 if (fmd_case_closed(hdl, bad->fmcase)) { 831 out(O_ALTFP, "already closed ]"); 832 } else { 833 out(O_ALTFP, "closing ]"); 834 fmd_case_close(hdl, bad->fmcase); 835 } 836 } 837 838 /*ARGSUSED*/ 839 static void 840 globals_destructor(void *left, void *right, void *arg) 841 { 842 struct evalue *evp = (struct evalue *)right; 843 if (evp->t == NODEPTR) 844 tree_free((struct node *)(uintptr_t)evp->v); 845 evp->v = NULL; 846 FREE(evp); 847 } 848 849 void 850 destroy_fme(struct fme *f) 851 { 852 stats_delete(f->Rcount); 853 stats_delete(f->Hcallcount); 854 stats_delete(f->Rcallcount); 855 stats_delete(f->Ccallcount); 856 stats_delete(f->Ecallcount); 857 stats_delete(f->Tcallcount); 858 stats_delete(f->Marrowcount); 859 stats_delete(f->diags); 860 861 if (f->eventtree != NULL) 862 itree_free(f->eventtree); 863 if (f->config) 864 structconfig_free(f->config); 865 lut_free(f->globals, globals_destructor, NULL); 866 FREE(f); 867 } 868 869 static const char * 870 fme_state2str(enum fme_state s) 871 { 872 switch (s) { 873 case FME_NOTHING: return ("NOTHING"); 874 case FME_WAIT: return ("WAIT"); 875 case FME_CREDIBLE: return ("CREDIBLE"); 876 case FME_DISPROVED: return ("DISPROVED"); 877 case FME_DEFERRED: return ("DEFERRED"); 878 default: return ("UNKNOWN"); 879 } 880 } 881 882 static int 883 is_problem(enum nametype t) 884 { 885 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 886 } 887 888 static int 889 is_fault(enum nametype t) 890 { 891 return (t == N_FAULT); 892 } 893 894 static int 895 is_defect(enum nametype t) 896 { 897 return (t == N_DEFECT); 898 } 899 900 static int 901 is_upset(enum nametype t) 902 { 903 return (t == N_UPSET); 904 } 905 906 static void 907 fme_print(int flags, struct fme *fmep) 908 { 909 struct event *ep; 910 911 out(flags, "Fault Management Exercise %d", fmep->id); 912 out(flags, "\t State: %s", fme_state2str(fmep->state)); 913 out(flags|O_NONL, "\t Start time: "); 914 ptree_timeval(flags|O_NONL, &fmep->ull); 915 out(flags, NULL); 916 if (fmep->wull) { 917 out(flags|O_NONL, "\t Wait time: "); 918 ptree_timeval(flags|O_NONL, &fmep->wull); 919 out(flags, NULL); 920 } 921 out(flags|O_NONL, "\t E0: "); 922 if (fmep->e0) 923 itree_pevent_brief(flags|O_NONL, fmep->e0); 924 else 925 out(flags|O_NONL, "NULL"); 926 out(flags, NULL); 927 out(flags|O_NONL, "\tObservations:"); 928 for (ep = fmep->observations; ep; ep = ep->observations) { 929 out(flags|O_NONL, " "); 930 itree_pevent_brief(flags|O_NONL, ep); 931 } 932 out(flags, NULL); 933 out(flags|O_NONL, "\tSuspect list:"); 934 for (ep = fmep->suspects; ep; ep = ep->suspects) { 935 out(flags|O_NONL, " "); 936 itree_pevent_brief(flags|O_NONL, ep); 937 } 938 out(flags, NULL); 939 if (fmep->eventtree != NULL) { 940 out(flags|O_VERB2, "\t Tree:"); 941 itree_ptree(flags|O_VERB2, fmep->eventtree); 942 } 943 } 944 945 static struct node * 946 pathstring2epnamenp(char *path) 947 { 948 char *sep = "/"; 949 struct node *ret; 950 char *ptr; 951 952 if ((ptr = strtok(path, sep)) == NULL) 953 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 954 955 ret = tree_iname(stable(ptr), NULL, 0); 956 957 while ((ptr = strtok(NULL, sep)) != NULL) 958 ret = tree_name_append(ret, 959 tree_iname(stable(ptr), NULL, 0)); 960 961 return (ret); 962 } 963 964 /* 965 * for a given upset sp, increment the corresponding SERD engine. if the 966 * SERD engine trips, return the ename and ipp of the resulting ereport. 967 * returns true if engine tripped and *enamep and *ippp were filled in. 968 */ 969 static int 970 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 971 fmd_case_t *fmcase, struct event *sp, const char **enamep, 972 const struct ipath **ippp) 973 { 974 struct node *serdinst; 975 char *serdname; 976 struct node *nid; 977 struct serd_entry *newentp; 978 979 ASSERT(sp->t == N_UPSET); 980 ASSERT(ffep != NULL); 981 982 /* 983 * obtain instanced SERD engine from the upset sp. from this 984 * derive serdname, the string used to identify the SERD engine. 985 */ 986 serdinst = eventprop_lookup(sp, L_engine); 987 988 if (serdinst == NULL) 989 return (NULL); 990 991 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 992 ipath(serdinst->u.stmt.np->u.event.epname)); 993 994 /* handle serd engine "id" property, if there is one */ 995 if ((nid = 996 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 997 struct evalue *gval; 998 char suffixbuf[200]; 999 char *suffix; 1000 char *nserdname; 1001 size_t nname; 1002 1003 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 1004 ptree_name_iter(O_ALTFP|O_NONL, nid); 1005 1006 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 1007 1008 if ((gval = lut_lookup(fmep->globals, 1009 (void *)nid->u.globid.s, NULL)) == NULL) { 1010 out(O_ALTFP, " undefined"); 1011 } else if (gval->t == UINT64) { 1012 out(O_ALTFP, " %llu", gval->v); 1013 (void) sprintf(suffixbuf, "%llu", gval->v); 1014 suffix = suffixbuf; 1015 } else { 1016 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 1017 suffix = (char *)(uintptr_t)gval->v; 1018 } 1019 1020 nname = strlen(serdname) + strlen(suffix) + 2; 1021 nserdname = MALLOC(nname); 1022 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 1023 FREE(serdname); 1024 serdname = nserdname; 1025 } 1026 1027 if (!fmd_serd_exists(hdl, serdname)) { 1028 struct node *nN, *nT; 1029 const char *s; 1030 struct node *nodep; 1031 struct config *cp; 1032 char *path; 1033 uint_t nval; 1034 hrtime_t tval; 1035 const char *name; 1036 char *serd_name; 1037 int i; 1038 char *ptr; 1039 int got_n_override = 0, got_t_override = 0; 1040 1041 /* no SERD engine yet, so create it */ 1042 nodep = serdinst->u.stmt.np->u.event.epname; 1043 name = serdinst->u.stmt.np->u.event.ename->u.name.s; 1044 path = ipath2str(NULL, ipath(nodep)); 1045 cp = config_lookup(fmep->config, path, 0); 1046 FREE((void *)path); 1047 1048 /* 1049 * We allow serd paramaters to be overridden, either from 1050 * eft.conf file values (if Serd_Override is set) or from 1051 * driver properties (for "serd.io.device" engines). 1052 */ 1053 if (Serd_Override != NULL) { 1054 char *save_ptr, *ptr1, *ptr2, *ptr3; 1055 ptr3 = save_ptr = STRDUP(Serd_Override); 1056 while (*ptr3 != '\0') { 1057 ptr1 = strchr(ptr3, ','); 1058 *ptr1 = '\0'; 1059 if (strcmp(ptr3, name) == 0) { 1060 ptr2 = strchr(ptr1 + 1, ','); 1061 *ptr2 = '\0'; 1062 nval = atoi(ptr1 + 1); 1063 out(O_ALTFP, "serd override %s_n %d", 1064 name, nval); 1065 ptr3 = strchr(ptr2 + 1, ' '); 1066 if (ptr3) 1067 *ptr3 = '\0'; 1068 ptr = STRDUP(ptr2 + 1); 1069 out(O_ALTFP, "serd override %s_t %s", 1070 name, ptr); 1071 got_n_override = 1; 1072 got_t_override = 1; 1073 break; 1074 } else { 1075 ptr2 = strchr(ptr1 + 1, ','); 1076 ptr3 = strchr(ptr2 + 1, ' '); 1077 if (ptr3 == NULL) 1078 break; 1079 } 1080 ptr3++; 1081 } 1082 FREE(save_ptr); 1083 } 1084 1085 if (cp && got_n_override == 0) { 1086 /* 1087 * convert serd engine name into property name 1088 */ 1089 serd_name = MALLOC(strlen(name) + 3); 1090 for (i = 0; i < strlen(name); i++) { 1091 if (name[i] == '.') 1092 serd_name[i] = '_'; 1093 else 1094 serd_name[i] = name[i]; 1095 } 1096 serd_name[i++] = '_'; 1097 serd_name[i++] = 'n'; 1098 serd_name[i] = '\0'; 1099 if (s = config_getprop(cp, serd_name)) { 1100 nval = atoi(s); 1101 out(O_ALTFP, "serd override %s_n %s", name, s); 1102 got_n_override = 1; 1103 } 1104 serd_name[i - 1] = 't'; 1105 if (s = config_getprop(cp, serd_name)) { 1106 ptr = STRDUP(s); 1107 out(O_ALTFP, "serd override %s_t %s", name, s); 1108 got_t_override = 1; 1109 } 1110 FREE(serd_name); 1111 } 1112 1113 if (!got_n_override) { 1114 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, 1115 NULL); 1116 ASSERT(nN->t == T_NUM); 1117 nval = (uint_t)nN->u.ull; 1118 } 1119 if (!got_t_override) { 1120 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, 1121 NULL); 1122 ASSERT(nT->t == T_TIMEVAL); 1123 tval = (hrtime_t)nT->u.ull; 1124 } else { 1125 const unsigned long long *ullp; 1126 const char *suffix; 1127 int len; 1128 1129 len = strspn(ptr, "0123456789"); 1130 suffix = stable(&ptr[len]); 1131 ullp = (unsigned long long *)lut_lookup(Timesuffixlut, 1132 (void *)suffix, NULL); 1133 ptr[len] = '\0'; 1134 tval = (unsigned long long)strtoul(ptr, NULL, 0) * 1135 (ullp ? *ullp : 1ll); 1136 FREE(ptr); 1137 } 1138 fmd_serd_create(hdl, serdname, nval, tval); 1139 } 1140 1141 newentp = MALLOC(sizeof (*newentp)); 1142 newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s); 1143 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname); 1144 newentp->hdl = hdl; 1145 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) { 1146 SerdEngines = lut_add(SerdEngines, (void *)newentp, 1147 (void *)newentp, (lut_cmp)serd_cmp); 1148 Serd_need_save = 1; 1149 serd_save(); 1150 } else { 1151 FREE(newentp); 1152 } 1153 1154 1155 /* 1156 * increment SERD engine. if engine fires, reset serd 1157 * engine and return trip_strcode 1158 */ 1159 if (fmd_serd_record(hdl, serdname, ffep)) { 1160 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 1161 (void *)L_trip, NULL); 1162 1163 ASSERT(tripinst != NULL); 1164 1165 *enamep = tripinst->u.event.ename->u.name.s; 1166 *ippp = ipath(tripinst->u.event.epname); 1167 1168 fmd_case_add_serd(hdl, fmcase, serdname); 1169 fmd_serd_reset(hdl, serdname); 1170 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 1171 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 1172 out(O_ALTFP, "]"); 1173 1174 FREE(serdname); 1175 return (1); 1176 } 1177 1178 FREE(serdname); 1179 return (0); 1180 } 1181 1182 /* 1183 * search a suspect list for upsets. feed each upset to serd_eval() and 1184 * build up tripped[], an array of ereports produced by the firing of 1185 * any SERD engines. then feed each ereport back into 1186 * fme_receive_report(). 1187 * 1188 * returns ntrip, the number of these ereports produced. 1189 */ 1190 static int 1191 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 1192 { 1193 /* we build an array of tripped ereports that we send ourselves */ 1194 struct { 1195 const char *ename; 1196 const struct ipath *ipp; 1197 } *tripped; 1198 struct event *sp; 1199 int ntrip, nupset, i; 1200 1201 /* 1202 * count the number of upsets to determine the upper limit on 1203 * expected trip ereport strings. remember that one upset can 1204 * lead to at most one ereport. 1205 */ 1206 nupset = 0; 1207 for (sp = fmep->suspects; sp; sp = sp->suspects) { 1208 if (sp->t == N_UPSET) 1209 nupset++; 1210 } 1211 1212 if (nupset == 0) 1213 return (0); 1214 1215 /* 1216 * get to this point if we have upsets and expect some trip 1217 * ereports 1218 */ 1219 tripped = alloca(sizeof (*tripped) * nupset); 1220 bzero((void *)tripped, sizeof (*tripped) * nupset); 1221 1222 ntrip = 0; 1223 for (sp = fmep->suspects; sp; sp = sp->suspects) 1224 if (sp->t == N_UPSET && 1225 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 1226 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 1227 ntrip++; 1228 1229 for (i = 0; i < ntrip; i++) { 1230 struct event *ep, *nep; 1231 struct fme *nfmep; 1232 fmd_case_t *fmcase; 1233 const struct ipath *ipp; 1234 const char *eventstring; 1235 int prev_verbose; 1236 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1237 enum fme_state state; 1238 1239 /* 1240 * First try and evaluate a case with the trip ereport plus 1241 * all the other ereports that cause the trip. If that fails 1242 * to evaluate then try again with just this ereport on its own. 1243 */ 1244 out(O_ALTFP|O_NONL, "fme_receive_report_serd: "); 1245 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1246 out(O_ALTFP|O_STAMP, NULL); 1247 ep = fmep->e0; 1248 eventstring = ep->enode->u.event.ename->u.name.s; 1249 ipp = ep->ipp; 1250 prune_propagations(eventstring, ipp); 1251 1252 /* 1253 * create a duplicate fme and case 1254 */ 1255 fmcase = fmd_case_open(fmep->hdl, NULL); 1256 out(O_ALTFP|O_NONL, "duplicate fme for event ["); 1257 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1258 out(O_ALTFP, " ]"); 1259 if ((nfmep = newfme(eventstring, ipp, fmep->hdl, 1260 fmcase)) == NULL) { 1261 out(O_ALTFP|O_NONL, "["); 1262 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1263 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1264 publish_undiagnosable(fmep->hdl, ffep, fmcase); 1265 continue; 1266 } 1267 Open_fme_count++; 1268 nfmep->pull = fmep->pull; 1269 init_fme_bufs(nfmep); 1270 out(O_ALTFP|O_NONL, "["); 1271 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1272 out(O_ALTFP, " created FME%d, case %s]", nfmep->id, 1273 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 1274 if (ffep) { 1275 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep); 1276 nfmep->e0r = ffep; 1277 } 1278 1279 /* 1280 * add the original ereports 1281 */ 1282 for (ep = fmep->observations; ep; ep = ep->observations) { 1283 eventstring = ep->enode->u.event.ename->u.name.s; 1284 ipp = ep->ipp; 1285 out(O_ALTFP|O_NONL, "adding event ["); 1286 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1287 out(O_ALTFP, " ]"); 1288 nep = itree_lookup(nfmep->eventtree, eventstring, ipp); 1289 if (nep->count++ == 0) { 1290 nep->observations = nfmep->observations; 1291 nfmep->observations = nep; 1292 serialize_observation(nfmep, eventstring, ipp); 1293 nep->nvp = evnv_dupnvl(ep->nvp); 1294 } 1295 if (ffep) 1296 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, 1297 ffep); 1298 stats_counter_bump(nfmep->Rcount); 1299 } 1300 1301 /* 1302 * add the serd trigger ereport 1303 */ 1304 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename, 1305 tripped[i].ipp)) == NULL) { 1306 /* 1307 * The trigger ereport is not in the instance tree. It 1308 * was presumably removed by prune_propagations() as 1309 * this combination of events is not present in the 1310 * rules. 1311 */ 1312 out(O_ALTFP, "upsets_eval: e0 not in instance tree"); 1313 Undiag_reason = UD_BADEVENTI; 1314 goto retry_lone_ereport; 1315 } 1316 out(O_ALTFP|O_NONL, "adding event ["); 1317 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1318 out(O_ALTFP, " ]"); 1319 nfmep->ecurrent = ep; 1320 ep->nvp = NULL; 1321 ep->count = 1; 1322 ep->observations = nfmep->observations; 1323 nfmep->observations = ep; 1324 1325 /* 1326 * just peek first. 1327 */ 1328 nfmep->peek = 1; 1329 prev_verbose = Verbose; 1330 if (Debug == 0) 1331 Verbose = 0; 1332 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep); 1333 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay); 1334 nfmep->peek = 0; 1335 Verbose = prev_verbose; 1336 if (state == FME_DISPROVED) { 1337 out(O_ALTFP, "upsets_eval: hypothesis disproved"); 1338 Undiag_reason = UD_UNSOLVD; 1339 retry_lone_ereport: 1340 /* 1341 * However the trigger ereport on its own might be 1342 * diagnosable, so check for that. Undo the new fme 1343 * and case we just created and call fme_receive_report. 1344 */ 1345 out(O_ALTFP|O_NONL, "["); 1346 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, 1347 tripped[i].ipp); 1348 out(O_ALTFP, " retrying with just trigger ereport]"); 1349 itree_free(nfmep->eventtree); 1350 nfmep->eventtree = NULL; 1351 structconfig_free(nfmep->config); 1352 nfmep->config = NULL; 1353 destroy_fme_bufs(nfmep); 1354 fmd_case_close(nfmep->hdl, nfmep->fmcase); 1355 fme_receive_report(fmep->hdl, ffep, 1356 tripped[i].ename, tripped[i].ipp, NULL); 1357 continue; 1358 } 1359 1360 /* 1361 * and evaluate 1362 */ 1363 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp); 1364 if (ffep) 1365 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep); 1366 stats_counter_bump(nfmep->Rcount); 1367 fme_eval(nfmep, ffep); 1368 } 1369 1370 return (ntrip); 1371 } 1372 1373 /* 1374 * fme_receive_external_report -- call when an external ereport comes in 1375 * 1376 * this routine just converts the relevant information from the ereport 1377 * into a format used internally and passes it on to fme_receive_report(). 1378 */ 1379 void 1380 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1381 const char *eventstring) 1382 { 1383 struct node *epnamenp = platform_getpath(nvl); 1384 const struct ipath *ipp; 1385 1386 /* 1387 * XFILE: If we ended up without a path, it's an X-file. 1388 * For now, use our undiagnosable interface. 1389 */ 1390 if (epnamenp == NULL) { 1391 fmd_case_t *fmcase; 1392 1393 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 1394 Undiag_reason = UD_NOPATH; 1395 fmcase = fmd_case_open(hdl, NULL); 1396 publish_undiagnosable(hdl, ffep, fmcase); 1397 return; 1398 } 1399 1400 ipp = ipath(epnamenp); 1401 tree_free(epnamenp); 1402 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 1403 } 1404 1405 /*ARGSUSED*/ 1406 void 1407 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1408 const char *eventstring) 1409 { 1410 char *uuid; 1411 nvlist_t **nva; 1412 uint_t nvc; 1413 const struct ipath *ipp; 1414 1415 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 1416 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1417 &nva, &nvc) != 0) { 1418 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 1419 return; 1420 } 1421 1422 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 1423 1424 while (nvc-- != 0) { 1425 /* 1426 * Reset any istat or serd engine associated with this path. 1427 */ 1428 char *path; 1429 1430 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 1431 continue; 1432 1433 path = ipath2str(NULL, ipp); 1434 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1435 path); 1436 FREE(path); 1437 1438 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1439 istat_save(); 1440 1441 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp); 1442 serd_save(); 1443 } 1444 } 1445 1446 /*ARGSUSED*/ 1447 void 1448 fme_receive_topology_change(void) 1449 { 1450 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL); 1451 istat_save(); 1452 1453 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL); 1454 serd_save(); 1455 } 1456 1457 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1458 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1459 1460 /* ARGSUSED */ 1461 static void 1462 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1463 { 1464 struct bubble *bp; 1465 struct arrowlist *ap; 1466 1467 ep->cached_state = 0; 1468 ep->keep_in_tree = 0; 1469 for (bp = itree_next_bubble(ep, NULL); bp; 1470 bp = itree_next_bubble(ep, bp)) { 1471 if (bp->t != B_FROM) 1472 continue; 1473 bp->mark = 0; 1474 for (ap = itree_next_arrow(bp, NULL); ap; 1475 ap = itree_next_arrow(bp, ap)) 1476 ap->arrowp->mark = 0; 1477 } 1478 } 1479 1480 static void 1481 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1482 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1483 { 1484 struct event *ep; 1485 struct fme *fmep = NULL; 1486 struct fme *ofmep = NULL; 1487 struct fme *cfmep, *svfmep; 1488 int matched = 0; 1489 nvlist_t *defect; 1490 fmd_case_t *fmcase; 1491 1492 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1493 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1494 out(O_ALTFP|O_STAMP, NULL); 1495 1496 /* decide which FME it goes to */ 1497 for (fmep = FMElist; fmep; fmep = fmep->next) { 1498 int prev_verbose; 1499 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1500 enum fme_state state; 1501 nvlist_t *pre_peek_nvp = NULL; 1502 1503 if (fmep->overflow) { 1504 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1505 ofmep = fmep; 1506 1507 continue; 1508 } 1509 1510 /* 1511 * ignore solved or closed cases 1512 */ 1513 if (fmep->posted_suspects || 1514 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1515 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1516 continue; 1517 1518 /* look up event in event tree for this FME */ 1519 if ((ep = itree_lookup(fmep->eventtree, 1520 eventstring, ipp)) == NULL) 1521 continue; 1522 1523 /* note observation */ 1524 fmep->ecurrent = ep; 1525 if (ep->count++ == 0) { 1526 /* link it into list of observations seen */ 1527 ep->observations = fmep->observations; 1528 fmep->observations = ep; 1529 ep->nvp = evnv_dupnvl(nvl); 1530 } else { 1531 /* use new payload values for peek */ 1532 pre_peek_nvp = ep->nvp; 1533 ep->nvp = evnv_dupnvl(nvl); 1534 } 1535 1536 /* tell hypothesise() not to mess with suspect list */ 1537 fmep->peek = 1; 1538 1539 /* don't want this to be verbose (unless Debug is set) */ 1540 prev_verbose = Verbose; 1541 if (Debug == 0) 1542 Verbose = 0; 1543 1544 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1545 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1546 1547 fmep->peek = 0; 1548 1549 /* put verbose flag back */ 1550 Verbose = prev_verbose; 1551 1552 if (state != FME_DISPROVED) { 1553 /* found an FME that explains the ereport */ 1554 matched++; 1555 out(O_ALTFP|O_NONL, "["); 1556 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1557 out(O_ALTFP, " explained by FME%d]", fmep->id); 1558 1559 if (pre_peek_nvp) 1560 nvlist_free(pre_peek_nvp); 1561 1562 if (ep->count == 1) 1563 serialize_observation(fmep, eventstring, ipp); 1564 1565 if (ffep) 1566 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1567 1568 stats_counter_bump(fmep->Rcount); 1569 1570 /* re-eval FME */ 1571 fme_eval(fmep, ffep); 1572 } else { 1573 1574 /* not a match, undo noting of observation */ 1575 fmep->ecurrent = NULL; 1576 if (--ep->count == 0) { 1577 /* unlink it from observations */ 1578 fmep->observations = ep->observations; 1579 ep->observations = NULL; 1580 nvlist_free(ep->nvp); 1581 ep->nvp = NULL; 1582 } else { 1583 nvlist_free(ep->nvp); 1584 ep->nvp = pre_peek_nvp; 1585 } 1586 } 1587 } 1588 1589 if (matched) 1590 return; /* explained by at least one existing FME */ 1591 1592 /* clean up closed fmes */ 1593 cfmep = ClosedFMEs; 1594 while (cfmep != NULL) { 1595 svfmep = cfmep->next; 1596 destroy_fme(cfmep); 1597 cfmep = svfmep; 1598 } 1599 ClosedFMEs = NULL; 1600 prune_propagations(eventstring, ipp); 1601 1602 if (ofmep) { 1603 out(O_ALTFP|O_NONL, "["); 1604 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1605 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1606 if (ffep) 1607 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1608 1609 return; 1610 1611 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1612 out(O_ALTFP|O_NONL, "["); 1613 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1614 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1615 1616 fmcase = fmd_case_open(hdl, NULL); 1617 1618 /* Create overflow fme */ 1619 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1620 out(O_ALTFP|O_NONL, "["); 1621 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1622 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1623 publish_undiagnosable(hdl, ffep, fmcase); 1624 return; 1625 } 1626 1627 Open_fme_count++; 1628 1629 init_fme_bufs(fmep); 1630 fmep->overflow = B_TRUE; 1631 1632 if (ffep) 1633 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1634 1635 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1636 NULL, NULL, NULL); 1637 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1638 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1639 fmd_case_solve(hdl, fmep->fmcase); 1640 return; 1641 } 1642 1643 /* open a case */ 1644 fmcase = fmd_case_open(hdl, NULL); 1645 1646 /* start a new FME */ 1647 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1648 out(O_ALTFP|O_NONL, "["); 1649 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1650 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1651 publish_undiagnosable(hdl, ffep, fmcase); 1652 return; 1653 } 1654 1655 Open_fme_count++; 1656 1657 init_fme_bufs(fmep); 1658 1659 out(O_ALTFP|O_NONL, "["); 1660 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1661 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1662 fmd_case_uuid(hdl, fmep->fmcase)); 1663 1664 ep = fmep->e0; 1665 ASSERT(ep != NULL); 1666 1667 /* note observation */ 1668 fmep->ecurrent = ep; 1669 if (ep->count++ == 0) { 1670 /* link it into list of observations seen */ 1671 ep->observations = fmep->observations; 1672 fmep->observations = ep; 1673 ep->nvp = evnv_dupnvl(nvl); 1674 serialize_observation(fmep, eventstring, ipp); 1675 } else { 1676 /* new payload overrides any previous */ 1677 nvlist_free(ep->nvp); 1678 ep->nvp = evnv_dupnvl(nvl); 1679 } 1680 1681 stats_counter_bump(fmep->Rcount); 1682 1683 if (ffep) { 1684 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1685 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1686 fmep->e0r = ffep; 1687 } 1688 1689 /* give the diagnosis algorithm a shot at the new FME state */ 1690 fme_eval(fmep, ffep); 1691 } 1692 1693 void 1694 fme_status(int flags) 1695 { 1696 struct fme *fmep; 1697 1698 if (FMElist == NULL) { 1699 out(flags, "No fault management exercises underway."); 1700 return; 1701 } 1702 1703 for (fmep = FMElist; fmep; fmep = fmep->next) 1704 fme_print(flags, fmep); 1705 } 1706 1707 /* 1708 * "indent" routines used mostly for nicely formatted debug output, but also 1709 * for sanity checking for infinite recursion bugs. 1710 */ 1711 1712 #define MAX_INDENT 1024 1713 static const char *indent_s[MAX_INDENT]; 1714 static int current_indent; 1715 1716 static void 1717 indent_push(const char *s) 1718 { 1719 if (current_indent < MAX_INDENT) 1720 indent_s[current_indent++] = s; 1721 else 1722 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1723 } 1724 1725 static void 1726 indent_set(const char *s) 1727 { 1728 current_indent = 0; 1729 indent_push(s); 1730 } 1731 1732 static void 1733 indent_pop(void) 1734 { 1735 if (current_indent > 0) 1736 current_indent--; 1737 else 1738 out(O_DIE, "recursion underflow"); 1739 } 1740 1741 static void 1742 indent(void) 1743 { 1744 int i; 1745 if (!Verbose) 1746 return; 1747 for (i = 0; i < current_indent; i++) 1748 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1749 } 1750 1751 #define SLNEW 1 1752 #define SLCHANGED 2 1753 #define SLWAIT 3 1754 #define SLDISPROVED 4 1755 1756 static void 1757 print_suspects(int circumstance, struct fme *fmep) 1758 { 1759 struct event *ep; 1760 1761 out(O_ALTFP|O_NONL, "["); 1762 if (circumstance == SLCHANGED) { 1763 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1764 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1765 } else if (circumstance == SLWAIT) { 1766 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id, 1767 fmep->timer); 1768 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1769 } else if (circumstance == SLDISPROVED) { 1770 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1771 } else { 1772 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1773 } 1774 1775 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1776 out(O_ALTFP, "]"); 1777 return; 1778 } 1779 1780 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1781 out(O_ALTFP|O_NONL, " "); 1782 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1783 } 1784 out(O_ALTFP, "]"); 1785 } 1786 1787 static struct node * 1788 eventprop_lookup(struct event *ep, const char *propname) 1789 { 1790 return (lut_lookup(ep->props, (void *)propname, NULL)); 1791 } 1792 1793 #define MAXDIGITIDX 23 1794 static char numbuf[MAXDIGITIDX + 1]; 1795 1796 static int 1797 node2uint(struct node *n, uint_t *valp) 1798 { 1799 struct evalue value; 1800 struct lut *globals = NULL; 1801 1802 if (n == NULL) 1803 return (1); 1804 1805 /* 1806 * check value.v since we are being asked to convert an unsigned 1807 * long long int to an unsigned int 1808 */ 1809 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1810 value.t != UINT64 || value.v > (1ULL << 32)) 1811 return (1); 1812 1813 *valp = (uint_t)value.v; 1814 1815 return (0); 1816 } 1817 1818 static nvlist_t * 1819 node2fmri(struct node *n) 1820 { 1821 nvlist_t **pa, *f, *p; 1822 struct node *nc; 1823 uint_t depth = 0; 1824 char *numstr, *nullbyte; 1825 char *failure; 1826 int err, i; 1827 1828 /* XXX do we need to be able to handle a non-T_NAME node? */ 1829 if (n == NULL || n->t != T_NAME) 1830 return (NULL); 1831 1832 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1833 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1834 break; 1835 depth++; 1836 } 1837 1838 if (nc != NULL) { 1839 /* We bailed early, something went wrong */ 1840 return (NULL); 1841 } 1842 1843 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1844 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1845 pa = alloca(depth * sizeof (nvlist_t *)); 1846 for (i = 0; i < depth; i++) 1847 pa[i] = NULL; 1848 1849 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1850 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1851 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1852 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1853 if (err != 0) { 1854 failure = "basic construction of FMRI failed"; 1855 goto boom; 1856 } 1857 1858 numbuf[MAXDIGITIDX] = '\0'; 1859 nullbyte = &numbuf[MAXDIGITIDX]; 1860 i = 0; 1861 1862 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1863 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1864 if (err != 0) { 1865 failure = "alloc of an hc-pair failed"; 1866 goto boom; 1867 } 1868 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1869 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1870 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1871 if (err != 0) { 1872 failure = "construction of an hc-pair failed"; 1873 goto boom; 1874 } 1875 pa[i++] = p; 1876 } 1877 1878 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1879 if (err == 0) { 1880 for (i = 0; i < depth; i++) 1881 if (pa[i] != NULL) 1882 nvlist_free(pa[i]); 1883 return (f); 1884 } 1885 failure = "addition of hc-pair array to FMRI failed"; 1886 1887 boom: 1888 for (i = 0; i < depth; i++) 1889 if (pa[i] != NULL) 1890 nvlist_free(pa[i]); 1891 nvlist_free(f); 1892 out(O_DIE, "%s", failure); 1893 /*NOTREACHED*/ 1894 return (NULL); 1895 } 1896 1897 /* an ipath cache entry is an array of these, with s==NULL at the end */ 1898 struct ipath { 1899 const char *s; /* component name (in stable) */ 1900 int i; /* instance number */ 1901 }; 1902 1903 static nvlist_t * 1904 ipath2fmri(struct ipath *ipath) 1905 { 1906 nvlist_t **pa, *f, *p; 1907 uint_t depth = 0; 1908 char *numstr, *nullbyte; 1909 char *failure; 1910 int err, i; 1911 struct ipath *ipp; 1912 1913 for (ipp = ipath; ipp->s != NULL; ipp++) 1914 depth++; 1915 1916 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1917 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1918 pa = alloca(depth * sizeof (nvlist_t *)); 1919 for (i = 0; i < depth; i++) 1920 pa[i] = NULL; 1921 1922 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1923 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1924 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1925 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1926 if (err != 0) { 1927 failure = "basic construction of FMRI failed"; 1928 goto boom; 1929 } 1930 1931 numbuf[MAXDIGITIDX] = '\0'; 1932 nullbyte = &numbuf[MAXDIGITIDX]; 1933 i = 0; 1934 1935 for (ipp = ipath; ipp->s != NULL; ipp++) { 1936 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1937 if (err != 0) { 1938 failure = "alloc of an hc-pair failed"; 1939 goto boom; 1940 } 1941 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s); 1942 numstr = ulltostr(ipp->i, nullbyte); 1943 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1944 if (err != 0) { 1945 failure = "construction of an hc-pair failed"; 1946 goto boom; 1947 } 1948 pa[i++] = p; 1949 } 1950 1951 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1952 if (err == 0) { 1953 for (i = 0; i < depth; i++) 1954 if (pa[i] != NULL) 1955 nvlist_free(pa[i]); 1956 return (f); 1957 } 1958 failure = "addition of hc-pair array to FMRI failed"; 1959 1960 boom: 1961 for (i = 0; i < depth; i++) 1962 if (pa[i] != NULL) 1963 nvlist_free(pa[i]); 1964 nvlist_free(f); 1965 out(O_DIE, "%s", failure); 1966 /*NOTREACHED*/ 1967 return (NULL); 1968 } 1969 1970 static uint_t 1971 avg(uint_t sum, uint_t cnt) 1972 { 1973 unsigned long long s = sum * 10; 1974 1975 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1976 } 1977 1978 static uint8_t 1979 percentof(uint_t part, uint_t whole) 1980 { 1981 unsigned long long p = part * 1000; 1982 1983 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1984 } 1985 1986 struct rsl { 1987 struct event *suspect; 1988 nvlist_t *asru; 1989 nvlist_t *fru; 1990 nvlist_t *rsrc; 1991 }; 1992 1993 /* 1994 * rslfree -- free internal members of struct rsl not expected to be 1995 * freed elsewhere. 1996 */ 1997 static void 1998 rslfree(struct rsl *freeme) 1999 { 2000 if (freeme->asru != NULL) 2001 nvlist_free(freeme->asru); 2002 if (freeme->fru != NULL) 2003 nvlist_free(freeme->fru); 2004 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 2005 nvlist_free(freeme->rsrc); 2006 } 2007 2008 /* 2009 * rslcmp -- compare two rsl structures. Use the following 2010 * comparisons to establish cardinality: 2011 * 2012 * 1. Name of the suspect's class. (simple strcmp) 2013 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 2014 * 2015 */ 2016 static int 2017 rslcmp(const void *a, const void *b) 2018 { 2019 struct rsl *r1 = (struct rsl *)a; 2020 struct rsl *r2 = (struct rsl *)b; 2021 int rv; 2022 2023 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 2024 r2->suspect->enode->u.event.ename->u.name.s); 2025 if (rv != 0) 2026 return (rv); 2027 2028 if (r1->asru == NULL && r2->asru == NULL) 2029 return (0); 2030 if (r1->asru == NULL) 2031 return (-1); 2032 if (r2->asru == NULL) 2033 return (1); 2034 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 2035 } 2036 2037 /* 2038 * rsluniq -- given an array of rsl structures, seek out and "remove" 2039 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 2040 * of the array element. Removal also means updating the number of 2041 * problems and the number of problems which are not faults. User 2042 * provides the first and last element pointers. 2043 */ 2044 static void 2045 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 2046 { 2047 struct rsl *cr; 2048 2049 if (*nprobs == 1) 2050 return; 2051 2052 /* 2053 * At this point, we only expect duplicate defects. 2054 * Eversholt's diagnosis algorithm prevents duplicate 2055 * suspects, but we rewrite defects in the platform code after 2056 * the diagnosis is made, and that can introduce new 2057 * duplicates. 2058 */ 2059 while (first <= last) { 2060 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 2061 first++; 2062 continue; 2063 } 2064 cr = first + 1; 2065 while (cr <= last) { 2066 if (is_defect(first->suspect->t)) { 2067 if (rslcmp(first, cr) == 0) { 2068 cr->suspect = NULL; 2069 rslfree(cr); 2070 (*nprobs)--; 2071 (*nnonf)--; 2072 } 2073 } 2074 /* 2075 * assume all defects are in order after our 2076 * sort and short circuit here with "else break" ? 2077 */ 2078 cr++; 2079 } 2080 first++; 2081 } 2082 } 2083 2084 /* 2085 * get_resources -- for a given suspect, determine what ASRU, FRU and 2086 * RSRC nvlists should be advertised in the final suspect list. 2087 */ 2088 void 2089 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 2090 { 2091 struct node *asrudef, *frudef; 2092 nvlist_t *asru, *fru; 2093 nvlist_t *rsrc = NULL; 2094 char *pathstr; 2095 2096 /* 2097 * First find any ASRU and/or FRU defined in the 2098 * initial fault tree. 2099 */ 2100 asrudef = eventprop_lookup(sp, L_ASRU); 2101 frudef = eventprop_lookup(sp, L_FRU); 2102 2103 /* 2104 * Create FMRIs based on those definitions 2105 */ 2106 asru = node2fmri(asrudef); 2107 fru = node2fmri(frudef); 2108 pathstr = ipath2str(NULL, sp->ipp); 2109 2110 /* 2111 * Allow for platform translations of the FMRIs 2112 */ 2113 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 2114 pathstr); 2115 2116 FREE(pathstr); 2117 rsrcs->suspect = sp; 2118 rsrcs->asru = asru; 2119 rsrcs->fru = fru; 2120 rsrcs->rsrc = rsrc; 2121 } 2122 2123 /* 2124 * trim_suspects -- prior to publishing, we may need to remove some 2125 * suspects from the list. If we're auto-closing upsets, we don't 2126 * want any of those in the published list. If the ASRUs for multiple 2127 * defects resolve to the same ASRU (driver) we only want to publish 2128 * that as a single suspect. 2129 */ 2130 static void 2131 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 2132 struct rsl **end) 2133 { 2134 struct event *ep; 2135 struct rsl *rp; 2136 int rpcnt; 2137 2138 /* 2139 * First save the suspects in the psuspects, then copy back 2140 * only the ones we wish to retain. This resets nsuspects to 2141 * zero. 2142 */ 2143 rpcnt = fmep->nsuspects; 2144 save_suspects(fmep); 2145 2146 /* 2147 * allocate an array of resource pointers for the suspects. 2148 * We may end up using less than the full allocation, but this 2149 * is a very short-lived array. publish_suspects() will free 2150 * this array when it's done using it. 2151 */ 2152 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 2153 bzero(rp, rpcnt * sizeof (struct rsl)); 2154 2155 /* first pass, remove any unwanted upsets and populate our array */ 2156 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 2157 if (no_upsets && is_upset(ep->t)) 2158 continue; 2159 get_resources(ep, rp, fmep->config); 2160 rp++; 2161 fmep->nsuspects++; 2162 if (!is_fault(ep->t)) 2163 fmep->nonfault++; 2164 } 2165 2166 /* if all we had was unwanted upsets, we're done */ 2167 if (fmep->nsuspects == 0) 2168 return; 2169 2170 *end = rp - 1; 2171 2172 /* sort the array */ 2173 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 2174 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 2175 } 2176 2177 /* 2178 * addpayloadprop -- add a payload prop to a problem 2179 */ 2180 static void 2181 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 2182 { 2183 ASSERT(fault != NULL); 2184 ASSERT(lhs != NULL); 2185 ASSERT(rhs != NULL); 2186 2187 if (rhs->t == UINT64) { 2188 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 2189 2190 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 2191 out(O_DIE, 2192 "cannot add payloadprop \"%s\" to fault", lhs); 2193 } else { 2194 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 2195 lhs, (char *)(uintptr_t)rhs->v); 2196 2197 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 2198 0) 2199 out(O_DIE, 2200 "cannot add payloadprop \"%s\" to fault", lhs); 2201 } 2202 } 2203 2204 static char *Istatbuf; 2205 static char *Istatbufptr; 2206 static int Istatsz; 2207 2208 /* 2209 * istataddsize -- calculate size of istat and add it to Istatsz 2210 */ 2211 /*ARGSUSED2*/ 2212 static void 2213 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2214 { 2215 int val; 2216 2217 ASSERT(lhs != NULL); 2218 ASSERT(rhs != NULL); 2219 2220 if ((val = stats_counter_value(rhs)) == 0) 2221 return; /* skip zero-valued stats */ 2222 2223 /* count up the size of the stat name */ 2224 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 2225 Istatsz++; /* for the trailing NULL byte */ 2226 2227 /* count up the size of the stat value */ 2228 Istatsz += snprintf(NULL, 0, "%d", val); 2229 Istatsz++; /* for the trailing NULL byte */ 2230 } 2231 2232 /* 2233 * istat2str -- serialize an istat, writing result to *Istatbufptr 2234 */ 2235 /*ARGSUSED2*/ 2236 static void 2237 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2238 { 2239 char *str; 2240 int len; 2241 int val; 2242 2243 ASSERT(lhs != NULL); 2244 ASSERT(rhs != NULL); 2245 2246 if ((val = stats_counter_value(rhs)) == 0) 2247 return; /* skip zero-valued stats */ 2248 2249 /* serialize the stat name */ 2250 str = ipath2str(lhs->ename, lhs->ipath); 2251 len = strlen(str); 2252 2253 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 2254 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 2255 Istatbufptr += len; 2256 FREE(str); 2257 *Istatbufptr++ = '\0'; 2258 2259 /* serialize the stat value */ 2260 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 2261 "%d", val); 2262 *Istatbufptr++ = '\0'; 2263 2264 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 2265 } 2266 2267 void 2268 istat_save() 2269 { 2270 if (Istat_need_save == 0) 2271 return; 2272 2273 /* figure out how big the serialzed info is */ 2274 Istatsz = 0; 2275 lut_walk(Istats, (lut_cb)istataddsize, NULL); 2276 2277 if (Istatsz == 0) { 2278 /* no stats to save */ 2279 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2280 return; 2281 } 2282 2283 /* create the serialized buffer */ 2284 Istatbufptr = Istatbuf = MALLOC(Istatsz); 2285 lut_walk(Istats, (lut_cb)istat2str, NULL); 2286 2287 /* clear out current saved stats */ 2288 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2289 2290 /* write out the new version */ 2291 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 2292 FREE(Istatbuf); 2293 2294 Istat_need_save = 0; 2295 } 2296 2297 int 2298 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 2299 { 2300 if (ent1->ename != ent2->ename) 2301 return (ent2->ename - ent1->ename); 2302 if (ent1->ipath != ent2->ipath) 2303 return ((char *)ent2->ipath - (char *)ent1->ipath); 2304 2305 return (0); 2306 } 2307 2308 /* 2309 * istat-verify -- verify the component associated with a stat still exists 2310 * 2311 * if the component no longer exists, this routine resets the stat and 2312 * returns 0. if the component still exists, it returns 1. 2313 */ 2314 static int 2315 istat_verify(struct node *snp, struct istat_entry *entp) 2316 { 2317 struct stats *statp; 2318 nvlist_t *fmri; 2319 2320 fmri = node2fmri(snp->u.event.epname); 2321 if (platform_path_exists(fmri)) { 2322 nvlist_free(fmri); 2323 return (1); 2324 } 2325 nvlist_free(fmri); 2326 2327 /* component no longer in system. zero out the associated stats */ 2328 if ((statp = (struct stats *) 2329 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 2330 stats_counter_value(statp) == 0) 2331 return (0); /* stat is already reset */ 2332 2333 Istat_need_save = 1; 2334 stats_counter_reset(statp); 2335 return (0); 2336 } 2337 2338 static void 2339 istat_bump(struct node *snp, int n) 2340 { 2341 struct stats *statp; 2342 struct istat_entry ent; 2343 2344 ASSERT(snp != NULL); 2345 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 2346 ASSERT(snp->u.event.epname != NULL); 2347 2348 /* class name should be hoisted into a single stable entry */ 2349 ASSERT(snp->u.event.ename->u.name.next == NULL); 2350 ent.ename = snp->u.event.ename->u.name.s; 2351 ent.ipath = ipath(snp->u.event.epname); 2352 2353 if (!istat_verify(snp, &ent)) { 2354 /* component no longer exists in system, nothing to do */ 2355 return; 2356 } 2357 2358 if ((statp = (struct stats *) 2359 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 2360 /* need to create the counter */ 2361 int cnt = 0; 2362 struct node *np; 2363 char *sname; 2364 char *snamep; 2365 struct istat_entry *newentp; 2366 2367 /* count up the size of the stat name */ 2368 np = snp->u.event.ename; 2369 while (np != NULL) { 2370 cnt += strlen(np->u.name.s); 2371 cnt++; /* for the '.' or '@' */ 2372 np = np->u.name.next; 2373 } 2374 np = snp->u.event.epname; 2375 while (np != NULL) { 2376 cnt += snprintf(NULL, 0, "%s%llu", 2377 np->u.name.s, np->u.name.child->u.ull); 2378 cnt++; /* for the '/' or trailing NULL byte */ 2379 np = np->u.name.next; 2380 } 2381 2382 /* build the stat name */ 2383 snamep = sname = alloca(cnt); 2384 np = snp->u.event.ename; 2385 while (np != NULL) { 2386 snamep += snprintf(snamep, &sname[cnt] - snamep, 2387 "%s", np->u.name.s); 2388 np = np->u.name.next; 2389 if (np) 2390 *snamep++ = '.'; 2391 } 2392 *snamep++ = '@'; 2393 np = snp->u.event.epname; 2394 while (np != NULL) { 2395 snamep += snprintf(snamep, &sname[cnt] - snamep, 2396 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 2397 np = np->u.name.next; 2398 if (np) 2399 *snamep++ = '/'; 2400 } 2401 *snamep++ = '\0'; 2402 2403 /* create the new stat & add it to our list */ 2404 newentp = MALLOC(sizeof (*newentp)); 2405 *newentp = ent; 2406 statp = stats_new_counter(NULL, sname, 0); 2407 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 2408 (lut_cmp)istat_cmp); 2409 } 2410 2411 /* if n is non-zero, set that value instead of bumping */ 2412 if (n) { 2413 stats_counter_reset(statp); 2414 stats_counter_add(statp, n); 2415 } else 2416 stats_counter_bump(statp); 2417 Istat_need_save = 1; 2418 2419 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 2420 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 2421 stats_counter_value(statp)); 2422 } 2423 2424 /*ARGSUSED*/ 2425 static void 2426 istat_destructor(void *left, void *right, void *arg) 2427 { 2428 struct istat_entry *entp = (struct istat_entry *)left; 2429 struct stats *statp = (struct stats *)right; 2430 FREE(entp); 2431 stats_delete(statp); 2432 } 2433 2434 /* 2435 * Callback used in a walk of the Istats to reset matching stat counters. 2436 */ 2437 static void 2438 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 2439 const struct ipath *ipp) 2440 { 2441 char *path; 2442 2443 if (entp->ipath == ipp) { 2444 path = ipath2str(entp->ename, ipp); 2445 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 2446 FREE(path); 2447 stats_counter_reset(statp); 2448 Istat_need_save = 1; 2449 } 2450 } 2451 2452 /*ARGSUSED*/ 2453 static void 2454 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp, 2455 void *unused) 2456 { 2457 char *path; 2458 nvlist_t *fmri; 2459 2460 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2461 if (!platform_path_exists(fmri)) { 2462 path = ipath2str(entp->ename, entp->ipath); 2463 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path); 2464 FREE(path); 2465 stats_counter_reset(statp); 2466 Istat_need_save = 1; 2467 } 2468 nvlist_free(fmri); 2469 } 2470 2471 void 2472 istat_fini(void) 2473 { 2474 lut_free(Istats, istat_destructor, NULL); 2475 } 2476 2477 static char *Serdbuf; 2478 static char *Serdbufptr; 2479 static int Serdsz; 2480 2481 /* 2482 * serdaddsize -- calculate size of serd and add it to Serdsz 2483 */ 2484 /*ARGSUSED*/ 2485 static void 2486 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2487 { 2488 ASSERT(lhs != NULL); 2489 2490 /* count up the size of the stat name */ 2491 Serdsz += ipath2strlen(lhs->ename, lhs->ipath); 2492 Serdsz++; /* for the trailing NULL byte */ 2493 } 2494 2495 /* 2496 * serd2str -- serialize a serd engine, writing result to *Serdbufptr 2497 */ 2498 /*ARGSUSED*/ 2499 static void 2500 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2501 { 2502 char *str; 2503 int len; 2504 2505 ASSERT(lhs != NULL); 2506 2507 /* serialize the serd engine name */ 2508 str = ipath2str(lhs->ename, lhs->ipath); 2509 len = strlen(str); 2510 2511 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]); 2512 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr); 2513 Serdbufptr += len; 2514 FREE(str); 2515 *Serdbufptr++ = '\0'; 2516 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]); 2517 } 2518 2519 void 2520 serd_save() 2521 { 2522 if (Serd_need_save == 0) 2523 return; 2524 2525 /* figure out how big the serialzed info is */ 2526 Serdsz = 0; 2527 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL); 2528 2529 if (Serdsz == 0) { 2530 /* no serd engines to save */ 2531 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2532 return; 2533 } 2534 2535 /* create the serialized buffer */ 2536 Serdbufptr = Serdbuf = MALLOC(Serdsz); 2537 lut_walk(SerdEngines, (lut_cb)serd2str, NULL); 2538 2539 /* clear out current saved stats */ 2540 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2541 2542 /* write out the new version */ 2543 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz); 2544 FREE(Serdbuf); 2545 Serd_need_save = 0; 2546 } 2547 2548 int 2549 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2) 2550 { 2551 if (ent1->ename != ent2->ename) 2552 return (ent2->ename - ent1->ename); 2553 if (ent1->ipath != ent2->ipath) 2554 return ((char *)ent2->ipath - (char *)ent1->ipath); 2555 2556 return (0); 2557 } 2558 2559 void 2560 fme_serd_load(fmd_hdl_t *hdl) 2561 { 2562 int sz; 2563 char *sbuf; 2564 char *sepptr; 2565 char *ptr; 2566 struct serd_entry *newentp; 2567 struct node *epname; 2568 nvlist_t *fmri; 2569 char *namestring; 2570 2571 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0) 2572 return; 2573 sbuf = alloca(sz); 2574 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz); 2575 ptr = sbuf; 2576 while (ptr < &sbuf[sz]) { 2577 sepptr = strchr(ptr, '@'); 2578 *sepptr = '\0'; 2579 namestring = ptr; 2580 sepptr++; 2581 ptr = sepptr; 2582 ptr += strlen(ptr); 2583 ptr++; /* move past the '\0' separating paths */ 2584 epname = pathstring2epnamenp(sepptr); 2585 fmri = node2fmri(epname); 2586 if (platform_path_exists(fmri)) { 2587 newentp = MALLOC(sizeof (*newentp)); 2588 newentp->hdl = hdl; 2589 newentp->ipath = ipath(epname); 2590 newentp->ename = stable(namestring); 2591 SerdEngines = lut_add(SerdEngines, (void *)newentp, 2592 (void *)newentp, (lut_cmp)serd_cmp); 2593 } else 2594 Serd_need_save = 1; 2595 tree_free(epname); 2596 nvlist_free(fmri); 2597 } 2598 /* save it back again in case some of the paths no longer exist */ 2599 serd_save(); 2600 } 2601 2602 /*ARGSUSED*/ 2603 static void 2604 serd_destructor(void *left, void *right, void *arg) 2605 { 2606 struct serd_entry *entp = (struct serd_entry *)left; 2607 FREE(entp); 2608 } 2609 2610 /* 2611 * Callback used in a walk of the SerdEngines to reset matching serd engines. 2612 */ 2613 /*ARGSUSED*/ 2614 static void 2615 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp) 2616 { 2617 char *path; 2618 2619 if (entp->ipath == ipp) { 2620 path = ipath2str(entp->ename, ipp); 2621 out(O_ALTFP, "serd_reset_cb: resetting %s", path); 2622 fmd_serd_reset(entp->hdl, path); 2623 FREE(path); 2624 Serd_need_save = 1; 2625 } 2626 } 2627 2628 /*ARGSUSED*/ 2629 static void 2630 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2) 2631 { 2632 char *path; 2633 nvlist_t *fmri; 2634 2635 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2636 if (!platform_path_exists(fmri)) { 2637 path = ipath2str(entp->ename, entp->ipath); 2638 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path); 2639 fmd_serd_reset(entp->hdl, path); 2640 FREE(path); 2641 Serd_need_save = 1; 2642 } 2643 nvlist_free(fmri); 2644 } 2645 2646 void 2647 serd_fini(void) 2648 { 2649 lut_free(SerdEngines, serd_destructor, NULL); 2650 } 2651 2652 static void 2653 publish_suspects(struct fme *fmep) 2654 { 2655 struct rsl *srl = NULL; 2656 struct rsl *erl; 2657 struct rsl *rp; 2658 nvlist_t *fault; 2659 uint8_t cert; 2660 uint_t *frs; 2661 uint_t fravg, frsum, fr; 2662 uint_t messval; 2663 struct node *snp; 2664 int frcnt, fridx; 2665 boolean_t no_upsets = B_FALSE; 2666 boolean_t allfaulty = B_TRUE; 2667 2668 stats_counter_bump(fmep->diags); 2669 2670 /* 2671 * If we're auto-closing upsets, we don't want to include them 2672 * in any produced suspect lists or certainty accounting. 2673 */ 2674 if (Autoclose != NULL) 2675 if (strcmp(Autoclose, "true") == 0 || 2676 strcmp(Autoclose, "all") == 0 || 2677 strcmp(Autoclose, "upsets") == 0) 2678 no_upsets = B_TRUE; 2679 2680 trim_suspects(fmep, no_upsets, &srl, &erl); 2681 2682 /* 2683 * If the resulting suspect list has no members, we're 2684 * done. Returning here will simply close the case. 2685 */ 2686 if (fmep->nsuspects == 0) { 2687 out(O_ALTFP, 2688 "[FME%d, case %s (all suspects are upsets)]", 2689 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2690 FREE(srl); 2691 restore_suspects(fmep); 2692 return; 2693 } 2694 2695 /* 2696 * If the suspect list is all faults, then for a given fault, 2697 * say X of N, X's certainty is computed via: 2698 * 2699 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 2700 * 2701 * If none of the suspects are faults, and there are N suspects, 2702 * the certainty of a given suspect is 100/N. 2703 * 2704 * If there are are a mixture of faults and other problems in 2705 * the suspect list, we take an average of the faults' 2706 * FITrates and treat this average as the FITrate for any 2707 * non-faults. The fitrate of any given suspect is then 2708 * computed per the first formula above. 2709 */ 2710 if (fmep->nonfault == fmep->nsuspects) { 2711 /* NO faults in the suspect list */ 2712 cert = percentof(1, fmep->nsuspects); 2713 } else { 2714 /* sum the fitrates */ 2715 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2716 fridx = frcnt = frsum = 0; 2717 2718 for (rp = srl; rp <= erl; rp++) { 2719 struct node *n; 2720 2721 if (rp->suspect == NULL) 2722 continue; 2723 if (!is_fault(rp->suspect->t)) { 2724 frs[fridx++] = 0; 2725 continue; 2726 } 2727 n = eventprop_lookup(rp->suspect, L_FITrate); 2728 if (node2uint(n, &fr) != 0) { 2729 out(O_DEBUG|O_NONL, "event "); 2730 ipath_print(O_DEBUG|O_NONL, 2731 rp->suspect->enode->u.event.ename->u.name.s, 2732 rp->suspect->ipp); 2733 out(O_DEBUG, " has no FITrate (using 1)"); 2734 fr = 1; 2735 } else if (fr == 0) { 2736 out(O_DEBUG|O_NONL, "event "); 2737 ipath_print(O_DEBUG|O_NONL, 2738 rp->suspect->enode->u.event.ename->u.name.s, 2739 rp->suspect->ipp); 2740 out(O_DEBUG, " has zero FITrate (using 1)"); 2741 fr = 1; 2742 } 2743 2744 frs[fridx++] = fr; 2745 frsum += fr; 2746 frcnt++; 2747 } 2748 fravg = avg(frsum, frcnt); 2749 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2750 if (frs[fridx] == 0) { 2751 frs[fridx] = fravg; 2752 frsum += fravg; 2753 } 2754 } 2755 2756 /* Add them in reverse order of our sort, as fmd reverses order */ 2757 for (rp = erl; rp >= srl; rp--) { 2758 if (rp->suspect == NULL) 2759 continue; 2760 if (!is_fault(rp->suspect->t)) 2761 allfaulty = B_FALSE; 2762 if (fmep->nonfault != fmep->nsuspects) 2763 cert = percentof(frs[--fridx], frsum); 2764 fault = fmd_nvl_create_fault(fmep->hdl, 2765 rp->suspect->enode->u.event.ename->u.name.s, 2766 cert, 2767 rp->asru, 2768 rp->fru, 2769 rp->rsrc); 2770 if (fault == NULL) 2771 out(O_DIE, "fault creation failed"); 2772 /* if "message" property exists, add it to the fault */ 2773 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2774 &messval) == 0) { 2775 2776 out(O_ALTFP, 2777 "[FME%d, %s adds message=%d to suspect list]", 2778 fmep->id, 2779 rp->suspect->enode->u.event.ename->u.name.s, 2780 messval); 2781 if (nvlist_add_boolean_value(fault, 2782 FM_SUSPECT_MESSAGE, 2783 (messval) ? B_TRUE : B_FALSE) != 0) { 2784 out(O_DIE, "cannot add no-message to fault"); 2785 } 2786 } 2787 /* add any payload properties */ 2788 lut_walk(rp->suspect->payloadprops, 2789 (lut_cb)addpayloadprop, (void *)fault); 2790 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2791 rslfree(rp); 2792 2793 /* 2794 * If "action" property exists, evaluate it; this must be done 2795 * before the dupclose check below since some actions may 2796 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2797 * needs to be restructured if any new actions are introduced 2798 * that have effects that we do not want to be visible if 2799 * we decide not to publish in the dupclose check below. 2800 */ 2801 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2802 struct evalue evalue; 2803 2804 out(O_ALTFP|O_NONL, 2805 "[FME%d, %s action ", fmep->id, 2806 rp->suspect->enode->u.event.ename->u.name.s); 2807 ptree_name_iter(O_ALTFP|O_NONL, snp); 2808 out(O_ALTFP, "]"); 2809 Action_nvl = fault; 2810 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2811 NULL, 0, &evalue); 2812 } 2813 2814 /* 2815 * if "dupclose" tunable is set, check if the asru is 2816 * already marked as "faulty". 2817 */ 2818 if (Dupclose && allfaulty) { 2819 nvlist_t *asru; 2820 2821 out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id); 2822 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2823 out(O_ALTFP|O_VERB|O_NONL, " "); 2824 if (nvlist_lookup_nvlist(fault, 2825 FM_FAULT_ASRU, &asru) != 0) { 2826 out(O_ALTFP|O_VERB, "NULL asru"); 2827 allfaulty = B_FALSE; 2828 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2829 out(O_ALTFP|O_VERB, "faulty"); 2830 } else { 2831 out(O_ALTFP|O_VERB, "not faulty"); 2832 allfaulty = B_FALSE; 2833 } 2834 } 2835 2836 } 2837 2838 /* 2839 * Close the case if all asrus are already known to be faulty and if 2840 * Dupclose is enabled. Otherwise we are going to publish so take 2841 * any pre-publication actions. 2842 */ 2843 if (Dupclose && allfaulty) { 2844 out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id, 2845 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2846 fmd_case_close(fmep->hdl, fmep->fmcase); 2847 } else { 2848 for (rp = erl; rp >= srl; rp--) { 2849 struct event *suspect = rp->suspect; 2850 2851 if (suspect == NULL) 2852 continue; 2853 2854 /* if "count" exists, increment the appropriate stat */ 2855 if ((snp = eventprop_lookup(suspect, 2856 L_count)) != NULL) { 2857 out(O_ALTFP|O_NONL, 2858 "[FME%d, %s count ", fmep->id, 2859 suspect->enode->u.event.ename->u.name.s); 2860 ptree_name_iter(O_ALTFP|O_NONL, snp); 2861 out(O_ALTFP, "]"); 2862 istat_bump(snp, 0); 2863 2864 } 2865 } 2866 istat_save(); /* write out any istat changes */ 2867 2868 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2869 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2870 fmd_case_solve(fmep->hdl, fmep->fmcase); 2871 } 2872 2873 /* 2874 * revert to the original suspect list 2875 */ 2876 FREE(srl); 2877 restore_suspects(fmep); 2878 } 2879 2880 static void 2881 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase) 2882 { 2883 struct case_list *newcase; 2884 nvlist_t *defect; 2885 2886 out(O_ALTFP, 2887 "[undiagnosable ereport received, " 2888 "creating and closing a new case (%s)]", 2889 Undiag_reason ? Undiag_reason : "reason not provided"); 2890 2891 newcase = MALLOC(sizeof (struct case_list)); 2892 newcase->next = NULL; 2893 newcase->fmcase = fmcase; 2894 if (Undiagablecaselist != NULL) 2895 newcase->next = Undiagablecaselist; 2896 Undiagablecaselist = newcase; 2897 2898 if (ffep != NULL) 2899 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2900 2901 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2902 NULL, NULL, NULL); 2903 if (Undiag_reason != NULL) 2904 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2905 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2906 2907 fmd_case_solve(hdl, newcase->fmcase); 2908 fmd_case_close(hdl, newcase->fmcase); 2909 } 2910 2911 static void 2912 fme_undiagnosble_pci(struct fme *f, nvlist_t *rc_detector) { 2913 nvlist_t *defect, *asru; 2914 char *path; 2915 2916 (void) nvlist_lookup_string(rc_detector, FM_FMRI_DEV_PATH, &path); 2917 out(O_ALTFP, "[solving/closing PCIE FME%d PATH %s]", f->id, path); 2918 2919 (void) nvlist_xalloc(&asru, NV_UNIQUE_NAME, &Eft_nv_hdl); 2920 (void) nvlist_add_uint8(asru, FM_VERSION, FM_HC_SCHEME_VERSION); 2921 (void) nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 2922 (void) nvlist_add_string(asru, FM_FMRI_DEV_PATH, path); 2923 2924 defect = fmd_nvl_create_fault(f->hdl, 2925 "fault.sunos.eft.unknown_pci_fault", 100, 2926 asru, NULL, NULL); 2927 2928 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_PCIUNSOLVD); 2929 fmd_case_pci_undiagnosable(f->hdl, f->fmcase, defect); 2930 2931 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2932 fmd_case_solve(f->hdl, f->fmcase); 2933 fmd_case_close(f->hdl, f->fmcase); 2934 } 2935 2936 static void 2937 fme_undiagnosable(struct fme *f) 2938 { 2939 nvlist_t *defect; 2940 nvlist_t *rc_detector; 2941 2942 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2943 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2944 Undiag_reason ? Undiag_reason : "undiagnosable"); 2945 2946 if ((strcmp(Undiag_reason, UD_UNSOLVD) == 0) && 2947 fmd_case_is_pcie(f->hdl, f->fmcase, &rc_detector)) { 2948 fme_undiagnosble_pci(f, rc_detector); 2949 return; 2950 } 2951 2952 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2953 NULL, NULL, NULL); 2954 if (Undiag_reason != NULL) 2955 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2956 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2957 fmd_case_solve(f->hdl, f->fmcase); 2958 fmd_case_close(f->hdl, f->fmcase); 2959 } 2960 2961 /* 2962 * fme_close_case 2963 * 2964 * Find the requested case amongst our fmes and close it. Free up 2965 * the related fme. 2966 */ 2967 void 2968 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2969 { 2970 struct case_list *ucasep, *prevcasep = NULL; 2971 struct fme *prev = NULL; 2972 struct fme *fmep; 2973 2974 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2975 if (fmcase != ucasep->fmcase) { 2976 prevcasep = ucasep; 2977 continue; 2978 } 2979 2980 if (prevcasep == NULL) 2981 Undiagablecaselist = Undiagablecaselist->next; 2982 else 2983 prevcasep->next = ucasep->next; 2984 2985 FREE(ucasep); 2986 return; 2987 } 2988 2989 for (fmep = FMElist; fmep; fmep = fmep->next) { 2990 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2991 break; 2992 prev = fmep; 2993 } 2994 2995 if (fmep == NULL) { 2996 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2997 fmd_case_uuid(hdl, fmcase)); 2998 return; 2999 } 3000 3001 if (EFMElist == fmep) 3002 EFMElist = prev; 3003 3004 if (prev == NULL) 3005 FMElist = FMElist->next; 3006 else 3007 prev->next = fmep->next; 3008 3009 fmep->next = NULL; 3010 3011 /* Get rid of any timer this fme has set */ 3012 if (fmep->wull != 0) 3013 fmd_timer_remove(fmep->hdl, fmep->timer); 3014 3015 if (ClosedFMEs == NULL) { 3016 ClosedFMEs = fmep; 3017 } else { 3018 fmep->next = ClosedFMEs; 3019 ClosedFMEs = fmep; 3020 } 3021 3022 Open_fme_count--; 3023 3024 /* See if we can close the overflow FME */ 3025 if (Open_fme_count <= Max_fme) { 3026 for (fmep = FMElist; fmep; fmep = fmep->next) { 3027 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 3028 fmep->fmcase))) 3029 break; 3030 } 3031 3032 if (fmep != NULL) 3033 fmd_case_close(fmep->hdl, fmep->fmcase); 3034 } 3035 } 3036 3037 /* 3038 * fme_set_timer() 3039 * If the time we need to wait for the given FME is less than the 3040 * current timer, kick that old timer out and establish a new one. 3041 */ 3042 static int 3043 fme_set_timer(struct fme *fmep, unsigned long long wull) 3044 { 3045 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 3046 ptree_timeval(O_ALTFP|O_VERB, &wull); 3047 3048 if (wull <= fmep->pull) { 3049 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 3050 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 3051 out(O_ALTFP|O_VERB, NULL); 3052 /* we've waited at least wull already, don't need timer */ 3053 return (0); 3054 } 3055 3056 out(O_ALTFP|O_VERB|O_NONL, " currently "); 3057 if (fmep->wull != 0) { 3058 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 3059 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 3060 out(O_ALTFP|O_VERB, NULL); 3061 } else { 3062 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 3063 out(O_ALTFP|O_VERB, NULL); 3064 } 3065 3066 if (fmep->wull != 0) 3067 if (wull >= fmep->wull) 3068 /* New timer would fire later than established timer */ 3069 return (0); 3070 3071 if (fmep->wull != 0) { 3072 fmd_timer_remove(fmep->hdl, fmep->timer); 3073 } 3074 3075 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 3076 fmep->e0r, wull); 3077 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 3078 fmep->wull = wull; 3079 return (1); 3080 } 3081 3082 void 3083 fme_timer_fired(struct fme *fmep, id_t tid) 3084 { 3085 struct fme *ffmep = NULL; 3086 3087 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 3088 if (ffmep == fmep) 3089 break; 3090 3091 if (ffmep == NULL) { 3092 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 3093 (void *)fmep); 3094 return; 3095 } 3096 3097 out(O_ALTFP|O_VERB, "Timer fired %lx", tid); 3098 fmep->pull = fmep->wull; 3099 fmep->wull = 0; 3100 fmd_buf_write(fmep->hdl, fmep->fmcase, 3101 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 3102 3103 fme_eval(fmep, fmep->e0r); 3104 } 3105 3106 /* 3107 * Preserve the fme's suspect list in its psuspects list, NULLing the 3108 * suspects list in the meantime. 3109 */ 3110 static void 3111 save_suspects(struct fme *fmep) 3112 { 3113 struct event *ep; 3114 struct event *nextep; 3115 3116 /* zero out the previous suspect list */ 3117 for (ep = fmep->psuspects; ep; ep = nextep) { 3118 nextep = ep->psuspects; 3119 ep->psuspects = NULL; 3120 } 3121 fmep->psuspects = NULL; 3122 3123 /* zero out the suspect list, copying it to previous suspect list */ 3124 fmep->psuspects = fmep->suspects; 3125 for (ep = fmep->suspects; ep; ep = nextep) { 3126 nextep = ep->suspects; 3127 ep->psuspects = ep->suspects; 3128 ep->suspects = NULL; 3129 ep->is_suspect = 0; 3130 } 3131 fmep->suspects = NULL; 3132 fmep->nsuspects = 0; 3133 fmep->nonfault = 0; 3134 } 3135 3136 /* 3137 * Retrieve the fme's suspect list from its psuspects list. 3138 */ 3139 static void 3140 restore_suspects(struct fme *fmep) 3141 { 3142 struct event *ep; 3143 struct event *nextep; 3144 3145 fmep->nsuspects = fmep->nonfault = 0; 3146 fmep->suspects = fmep->psuspects; 3147 for (ep = fmep->psuspects; ep; ep = nextep) { 3148 fmep->nsuspects++; 3149 if (!is_fault(ep->t)) 3150 fmep->nonfault++; 3151 nextep = ep->psuspects; 3152 ep->suspects = ep->psuspects; 3153 } 3154 } 3155 3156 /* 3157 * this is what we use to call the Emrys prototype code instead of main() 3158 */ 3159 static void 3160 fme_eval(struct fme *fmep, fmd_event_t *ffep) 3161 { 3162 struct event *ep; 3163 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 3164 3165 save_suspects(fmep); 3166 3167 out(O_ALTFP, "Evaluate FME %d", fmep->id); 3168 indent_set(" "); 3169 3170 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 3171 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 3172 3173 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 3174 fme_state2str(fmep->state)); 3175 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3176 out(O_ALTFP|O_NONL, " "); 3177 itree_pevent_brief(O_ALTFP|O_NONL, ep); 3178 } 3179 out(O_ALTFP, NULL); 3180 3181 switch (fmep->state) { 3182 case FME_CREDIBLE: 3183 print_suspects(SLNEW, fmep); 3184 (void) upsets_eval(fmep, ffep); 3185 3186 /* 3187 * we may have already posted suspects in upsets_eval() which 3188 * can recurse into fme_eval() again. If so then just return. 3189 */ 3190 if (fmep->posted_suspects) 3191 return; 3192 3193 publish_suspects(fmep); 3194 fmep->posted_suspects = 1; 3195 fmd_buf_write(fmep->hdl, fmep->fmcase, 3196 WOBUF_POSTD, 3197 (void *)&fmep->posted_suspects, 3198 sizeof (fmep->posted_suspects)); 3199 3200 /* 3201 * Now the suspects have been posted, we can clear up 3202 * the instance tree as we won't be looking at it again. 3203 * Also cancel the timer as the case is now solved. 3204 */ 3205 if (fmep->wull != 0) { 3206 fmd_timer_remove(fmep->hdl, fmep->timer); 3207 fmep->wull = 0; 3208 } 3209 break; 3210 3211 case FME_WAIT: 3212 ASSERT(my_delay > fmep->ull); 3213 (void) fme_set_timer(fmep, my_delay); 3214 print_suspects(SLWAIT, fmep); 3215 itree_prune(fmep->eventtree); 3216 return; 3217 3218 case FME_DISPROVED: 3219 print_suspects(SLDISPROVED, fmep); 3220 Undiag_reason = UD_UNSOLVD; 3221 fme_undiagnosable(fmep); 3222 break; 3223 } 3224 3225 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 3226 int doclose = 0; 3227 3228 if (strcmp(Autoclose, "true") == 0 || 3229 strcmp(Autoclose, "all") == 0) 3230 doclose = 1; 3231 3232 if (strcmp(Autoclose, "upsets") == 0) { 3233 doclose = 1; 3234 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3235 if (ep->t != N_UPSET) { 3236 doclose = 0; 3237 break; 3238 } 3239 } 3240 } 3241 3242 if (doclose) { 3243 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 3244 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3245 fmd_case_close(fmep->hdl, fmep->fmcase); 3246 } 3247 } 3248 itree_free(fmep->eventtree); 3249 fmep->eventtree = NULL; 3250 structconfig_free(fmep->config); 3251 fmep->config = NULL; 3252 destroy_fme_bufs(fmep); 3253 } 3254 3255 static void indent(void); 3256 static int triggered(struct fme *fmep, struct event *ep, int mark); 3257 static enum fme_state effects_test(struct fme *fmep, 3258 struct event *fault_event, unsigned long long at_latest_by, 3259 unsigned long long *pdelay); 3260 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 3261 unsigned long long at_latest_by, unsigned long long *pdelay); 3262 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 3263 unsigned long long at_latest_by, unsigned long long *pdelay); 3264 3265 static int 3266 checkconstraints(struct fme *fmep, struct arrow *arrowp) 3267 { 3268 struct constraintlist *ctp; 3269 struct evalue value; 3270 char *sep = ""; 3271 3272 if (arrowp->forever_false) { 3273 indent(); 3274 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 3275 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3276 out(O_ALTFP|O_VERB|O_NONL, sep); 3277 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3278 sep = ", "; 3279 } 3280 out(O_ALTFP|O_VERB, NULL); 3281 return (0); 3282 } 3283 if (arrowp->forever_true) { 3284 indent(); 3285 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: "); 3286 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3287 out(O_ALTFP|O_VERB|O_NONL, sep); 3288 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3289 sep = ", "; 3290 } 3291 out(O_ALTFP|O_VERB, NULL); 3292 return (1); 3293 } 3294 3295 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3296 if (eval_expr(ctp->cnode, NULL, NULL, 3297 &fmep->globals, fmep->config, 3298 arrowp, 0, &value)) { 3299 /* evaluation successful */ 3300 if (value.t == UNDEFINED || value.v == 0) { 3301 /* known false */ 3302 arrowp->forever_false = 1; 3303 indent(); 3304 out(O_ALTFP|O_VERB|O_NONL, 3305 " False constraint: "); 3306 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3307 out(O_ALTFP|O_VERB, NULL); 3308 return (0); 3309 } 3310 } else { 3311 /* evaluation unsuccessful -- unknown value */ 3312 indent(); 3313 out(O_ALTFP|O_VERB|O_NONL, 3314 " Deferred constraint: "); 3315 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3316 out(O_ALTFP|O_VERB, NULL); 3317 return (1); 3318 } 3319 } 3320 /* known true */ 3321 arrowp->forever_true = 1; 3322 indent(); 3323 out(O_ALTFP|O_VERB|O_NONL, " True constraint: "); 3324 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3325 out(O_ALTFP|O_VERB|O_NONL, sep); 3326 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3327 sep = ", "; 3328 } 3329 out(O_ALTFP|O_VERB, NULL); 3330 return (1); 3331 } 3332 3333 static int 3334 triggered(struct fme *fmep, struct event *ep, int mark) 3335 { 3336 struct bubble *bp; 3337 struct arrowlist *ap; 3338 int count = 0; 3339 3340 stats_counter_bump(fmep->Tcallcount); 3341 for (bp = itree_next_bubble(ep, NULL); bp; 3342 bp = itree_next_bubble(ep, bp)) { 3343 if (bp->t != B_TO) 3344 continue; 3345 for (ap = itree_next_arrow(bp, NULL); ap; 3346 ap = itree_next_arrow(bp, ap)) { 3347 /* check count of marks against K in the bubble */ 3348 if ((ap->arrowp->mark & mark) && 3349 ++count >= bp->nork) 3350 return (1); 3351 } 3352 } 3353 return (0); 3354 } 3355 3356 static int 3357 mark_arrows(struct fme *fmep, struct event *ep, int mark, 3358 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 3359 { 3360 struct bubble *bp; 3361 struct arrowlist *ap; 3362 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3363 unsigned long long my_delay; 3364 enum fme_state result; 3365 int retval = 0; 3366 3367 for (bp = itree_next_bubble(ep, NULL); bp; 3368 bp = itree_next_bubble(ep, bp)) { 3369 if (bp->t != B_FROM) 3370 continue; 3371 stats_counter_bump(fmep->Marrowcount); 3372 for (ap = itree_next_arrow(bp, NULL); ap; 3373 ap = itree_next_arrow(bp, ap)) { 3374 struct event *ep2 = ap->arrowp->head->myevent; 3375 /* 3376 * if we're clearing marks, we can avoid doing 3377 * all that work evaluating constraints. 3378 */ 3379 if (mark == 0) { 3380 if (ap->arrowp->arrow_marked == 0) 3381 continue; 3382 ap->arrowp->arrow_marked = 0; 3383 ap->arrowp->mark &= ~EFFECTS_COUNTER; 3384 if (keep && (ep2->cached_state & 3385 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 3386 ep2->keep_in_tree = 1; 3387 ep2->cached_state &= 3388 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 3389 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 3390 keep); 3391 continue; 3392 } 3393 ap->arrowp->arrow_marked = 1; 3394 if (ep2->cached_state & REQMNTS_DISPROVED) { 3395 indent(); 3396 out(O_ALTFP|O_VERB|O_NONL, 3397 " ALREADY DISPROVED "); 3398 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3399 out(O_ALTFP|O_VERB, NULL); 3400 continue; 3401 } 3402 if (ep2->cached_state & WAIT_EFFECT) { 3403 indent(); 3404 out(O_ALTFP|O_VERB|O_NONL, 3405 " ALREADY EFFECTS WAIT "); 3406 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3407 out(O_ALTFP|O_VERB, NULL); 3408 continue; 3409 } 3410 if (ep2->cached_state & CREDIBLE_EFFECT) { 3411 indent(); 3412 out(O_ALTFP|O_VERB|O_NONL, 3413 " ALREADY EFFECTS CREDIBLE "); 3414 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3415 out(O_ALTFP|O_VERB, NULL); 3416 continue; 3417 } 3418 if ((ep2->cached_state & PARENT_WAIT) && 3419 (mark & PARENT_WAIT)) { 3420 indent(); 3421 out(O_ALTFP|O_VERB|O_NONL, 3422 " ALREADY PARENT EFFECTS WAIT "); 3423 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3424 out(O_ALTFP|O_VERB, NULL); 3425 continue; 3426 } 3427 platform_set_payloadnvp(ep2->nvp); 3428 if (checkconstraints(fmep, ap->arrowp) == 0) { 3429 platform_set_payloadnvp(NULL); 3430 indent(); 3431 out(O_ALTFP|O_VERB|O_NONL, 3432 " CONSTRAINTS FAIL "); 3433 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3434 out(O_ALTFP|O_VERB, NULL); 3435 continue; 3436 } 3437 platform_set_payloadnvp(NULL); 3438 ap->arrowp->mark |= EFFECTS_COUNTER; 3439 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 3440 indent(); 3441 out(O_ALTFP|O_VERB|O_NONL, 3442 " K-COUNT NOT YET MET "); 3443 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3444 out(O_ALTFP|O_VERB, NULL); 3445 continue; 3446 } 3447 ep2->cached_state &= ~PARENT_WAIT; 3448 /* 3449 * if we've reached an ereport and no propagation time 3450 * is specified, use the Hesitate value 3451 */ 3452 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 3453 ap->arrowp->maxdelay == 0ULL) { 3454 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 3455 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3456 out(O_ALTFP|O_VERB, NULL); 3457 result = requirements_test(fmep, ep2, Hesitate, 3458 &my_delay); 3459 } else { 3460 result = requirements_test(fmep, ep2, 3461 at_latest_by + ap->arrowp->maxdelay, 3462 &my_delay); 3463 } 3464 if (result == FME_WAIT) { 3465 retval = WAIT_EFFECT; 3466 if (overall_delay > my_delay) 3467 overall_delay = my_delay; 3468 ep2->cached_state |= WAIT_EFFECT; 3469 indent(); 3470 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 3471 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3472 out(O_ALTFP|O_VERB, NULL); 3473 indent_push(" E"); 3474 if (mark_arrows(fmep, ep2, PARENT_WAIT, 3475 at_latest_by, &my_delay, 0) == 3476 WAIT_EFFECT) { 3477 retval = WAIT_EFFECT; 3478 if (overall_delay > my_delay) 3479 overall_delay = my_delay; 3480 } 3481 indent_pop(); 3482 } else if (result == FME_DISPROVED) { 3483 indent(); 3484 out(O_ALTFP|O_VERB|O_NONL, 3485 " EFFECTS DISPROVED "); 3486 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3487 out(O_ALTFP|O_VERB, NULL); 3488 } else { 3489 ep2->cached_state |= mark; 3490 indent(); 3491 if (mark == CREDIBLE_EFFECT) 3492 out(O_ALTFP|O_VERB|O_NONL, 3493 " EFFECTS CREDIBLE "); 3494 else 3495 out(O_ALTFP|O_VERB|O_NONL, 3496 " PARENT EFFECTS WAIT "); 3497 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3498 out(O_ALTFP|O_VERB, NULL); 3499 indent_push(" E"); 3500 if (mark_arrows(fmep, ep2, mark, at_latest_by, 3501 &my_delay, 0) == WAIT_EFFECT) { 3502 retval = WAIT_EFFECT; 3503 if (overall_delay > my_delay) 3504 overall_delay = my_delay; 3505 } 3506 indent_pop(); 3507 } 3508 } 3509 } 3510 if (retval == WAIT_EFFECT) 3511 *pdelay = overall_delay; 3512 return (retval); 3513 } 3514 3515 static enum fme_state 3516 effects_test(struct fme *fmep, struct event *fault_event, 3517 unsigned long long at_latest_by, unsigned long long *pdelay) 3518 { 3519 struct event *error_event; 3520 enum fme_state return_value = FME_CREDIBLE; 3521 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3522 unsigned long long my_delay; 3523 3524 stats_counter_bump(fmep->Ecallcount); 3525 indent_push(" E"); 3526 indent(); 3527 out(O_ALTFP|O_VERB|O_NONL, "->"); 3528 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3529 out(O_ALTFP|O_VERB, NULL); 3530 3531 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 3532 &my_delay, 0) == WAIT_EFFECT) { 3533 return_value = FME_WAIT; 3534 if (overall_delay > my_delay) 3535 overall_delay = my_delay; 3536 } 3537 for (error_event = fmep->observations; 3538 error_event; error_event = error_event->observations) { 3539 indent(); 3540 out(O_ALTFP|O_VERB|O_NONL, " "); 3541 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 3542 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 3543 if (error_event->cached_state & 3544 (PARENT_WAIT|WAIT_EFFECT)) { 3545 out(O_ALTFP|O_VERB, " NOT YET triggered"); 3546 continue; 3547 } 3548 return_value = FME_DISPROVED; 3549 out(O_ALTFP|O_VERB, " NOT triggered"); 3550 break; 3551 } else { 3552 out(O_ALTFP|O_VERB, " triggered"); 3553 } 3554 } 3555 if (return_value == FME_DISPROVED) { 3556 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 3557 } else { 3558 fault_event->keep_in_tree = 1; 3559 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 3560 } 3561 3562 indent(); 3563 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 3564 fme_state2str(return_value)); 3565 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3566 out(O_ALTFP|O_VERB, NULL); 3567 indent_pop(); 3568 if (return_value == FME_WAIT) 3569 *pdelay = overall_delay; 3570 return (return_value); 3571 } 3572 3573 static enum fme_state 3574 requirements_test(struct fme *fmep, struct event *ep, 3575 unsigned long long at_latest_by, unsigned long long *pdelay) 3576 { 3577 int waiting_events; 3578 int credible_events; 3579 int deferred_events; 3580 enum fme_state return_value = FME_CREDIBLE; 3581 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3582 unsigned long long arrow_delay; 3583 unsigned long long my_delay; 3584 struct event *ep2; 3585 struct bubble *bp; 3586 struct arrowlist *ap; 3587 3588 if (ep->cached_state & REQMNTS_CREDIBLE) { 3589 indent(); 3590 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 3591 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3592 out(O_ALTFP|O_VERB, NULL); 3593 return (FME_CREDIBLE); 3594 } 3595 if (ep->cached_state & REQMNTS_DISPROVED) { 3596 indent(); 3597 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 3598 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3599 out(O_ALTFP|O_VERB, NULL); 3600 return (FME_DISPROVED); 3601 } 3602 if (ep->cached_state & REQMNTS_WAIT) { 3603 indent(); 3604 *pdelay = ep->cached_delay; 3605 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 3606 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3607 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 3608 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3609 out(O_ALTFP|O_VERB, NULL); 3610 return (FME_WAIT); 3611 } 3612 stats_counter_bump(fmep->Rcallcount); 3613 indent_push(" R"); 3614 indent(); 3615 out(O_ALTFP|O_VERB|O_NONL, "->"); 3616 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3617 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3618 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3619 out(O_ALTFP|O_VERB, NULL); 3620 3621 if (ep->t == N_EREPORT) { 3622 if (ep->count == 0) { 3623 if (fmep->pull >= at_latest_by) { 3624 return_value = FME_DISPROVED; 3625 } else { 3626 ep->cached_delay = *pdelay = at_latest_by; 3627 return_value = FME_WAIT; 3628 } 3629 } 3630 3631 indent(); 3632 switch (return_value) { 3633 case FME_CREDIBLE: 3634 ep->cached_state |= REQMNTS_CREDIBLE; 3635 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 3636 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3637 break; 3638 case FME_DISPROVED: 3639 ep->cached_state |= REQMNTS_DISPROVED; 3640 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3641 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3642 break; 3643 case FME_WAIT: 3644 ep->cached_state |= REQMNTS_WAIT; 3645 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 3646 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3647 out(O_ALTFP|O_VERB|O_NONL, " to "); 3648 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3649 break; 3650 default: 3651 out(O_DIE, "requirements_test: unexpected fme_state"); 3652 break; 3653 } 3654 out(O_ALTFP|O_VERB, NULL); 3655 indent_pop(); 3656 3657 return (return_value); 3658 } 3659 3660 /* this event is not a report, descend the tree */ 3661 for (bp = itree_next_bubble(ep, NULL); bp; 3662 bp = itree_next_bubble(ep, bp)) { 3663 int n; 3664 3665 if (bp->t != B_FROM) 3666 continue; 3667 3668 n = bp->nork; 3669 3670 credible_events = 0; 3671 waiting_events = 0; 3672 deferred_events = 0; 3673 arrow_delay = TIMEVAL_EVENTUALLY; 3674 /* 3675 * n is -1 for 'A' so adjust it. 3676 * XXX just count up the arrows for now. 3677 */ 3678 if (n < 0) { 3679 n = 0; 3680 for (ap = itree_next_arrow(bp, NULL); ap; 3681 ap = itree_next_arrow(bp, ap)) 3682 n++; 3683 indent(); 3684 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 3685 } else { 3686 indent(); 3687 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 3688 } 3689 3690 if (n == 0) 3691 continue; 3692 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 3693 for (ap = itree_next_arrow(bp, NULL); ap; 3694 ap = itree_next_arrow(bp, ap)) { 3695 ep2 = ap->arrowp->head->myevent; 3696 platform_set_payloadnvp(ep2->nvp); 3697 if (checkconstraints(fmep, ap->arrowp) == 0) { 3698 /* 3699 * if any arrow is invalidated by the 3700 * constraints, then we should elide the 3701 * whole bubble to be consistant with 3702 * the tree creation time behaviour 3703 */ 3704 bp->mark |= BUBBLE_ELIDED; 3705 platform_set_payloadnvp(NULL); 3706 break; 3707 } 3708 platform_set_payloadnvp(NULL); 3709 } 3710 } 3711 if (bp->mark & BUBBLE_ELIDED) 3712 continue; 3713 bp->mark |= BUBBLE_OK; 3714 for (ap = itree_next_arrow(bp, NULL); ap; 3715 ap = itree_next_arrow(bp, ap)) { 3716 ep2 = ap->arrowp->head->myevent; 3717 if (n <= credible_events) 3718 break; 3719 3720 ap->arrowp->mark |= REQMNTS_COUNTER; 3721 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 3722 /* XXX adding max timevals! */ 3723 switch (requirements_test(fmep, ep2, 3724 at_latest_by + ap->arrowp->maxdelay, 3725 &my_delay)) { 3726 case FME_DEFERRED: 3727 deferred_events++; 3728 break; 3729 case FME_CREDIBLE: 3730 credible_events++; 3731 break; 3732 case FME_DISPROVED: 3733 break; 3734 case FME_WAIT: 3735 if (my_delay < arrow_delay) 3736 arrow_delay = my_delay; 3737 waiting_events++; 3738 break; 3739 default: 3740 out(O_DIE, 3741 "Bug in requirements_test."); 3742 } 3743 else 3744 deferred_events++; 3745 } 3746 indent(); 3747 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3748 credible_events + deferred_events, waiting_events); 3749 if (credible_events + deferred_events + waiting_events < n) { 3750 /* Can never meet requirements */ 3751 ep->cached_state |= REQMNTS_DISPROVED; 3752 indent(); 3753 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3754 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3755 out(O_ALTFP|O_VERB, NULL); 3756 indent_pop(); 3757 return (FME_DISPROVED); 3758 } 3759 if (credible_events + deferred_events < n) { 3760 /* will have to wait */ 3761 /* wait time is shortest known */ 3762 if (arrow_delay < overall_delay) 3763 overall_delay = arrow_delay; 3764 return_value = FME_WAIT; 3765 } else if (credible_events < n) { 3766 if (return_value != FME_WAIT) 3767 return_value = FME_DEFERRED; 3768 } 3769 } 3770 3771 /* 3772 * don't mark as FME_DEFERRED. If this event isn't reached by another 3773 * path, then this will be considered FME_CREDIBLE. But if it is 3774 * reached by a different path so the K-count is met, then might 3775 * get overridden by FME_WAIT or FME_DISPROVED. 3776 */ 3777 if (return_value == FME_WAIT) { 3778 ep->cached_state |= REQMNTS_WAIT; 3779 ep->cached_delay = *pdelay = overall_delay; 3780 } else if (return_value == FME_CREDIBLE) { 3781 ep->cached_state |= REQMNTS_CREDIBLE; 3782 } 3783 indent(); 3784 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3785 fme_state2str(return_value)); 3786 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3787 out(O_ALTFP|O_VERB, NULL); 3788 indent_pop(); 3789 return (return_value); 3790 } 3791 3792 static enum fme_state 3793 causes_test(struct fme *fmep, struct event *ep, 3794 unsigned long long at_latest_by, unsigned long long *pdelay) 3795 { 3796 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3797 unsigned long long my_delay; 3798 int credible_results = 0; 3799 int waiting_results = 0; 3800 enum fme_state fstate; 3801 struct event *tail_event; 3802 struct bubble *bp; 3803 struct arrowlist *ap; 3804 int k = 1; 3805 3806 stats_counter_bump(fmep->Ccallcount); 3807 indent_push(" C"); 3808 indent(); 3809 out(O_ALTFP|O_VERB|O_NONL, "->"); 3810 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3811 out(O_ALTFP|O_VERB, NULL); 3812 3813 for (bp = itree_next_bubble(ep, NULL); bp; 3814 bp = itree_next_bubble(ep, bp)) { 3815 if (bp->t != B_TO) 3816 continue; 3817 k = bp->nork; /* remember the K value */ 3818 for (ap = itree_next_arrow(bp, NULL); ap; 3819 ap = itree_next_arrow(bp, ap)) { 3820 int do_not_follow = 0; 3821 3822 /* 3823 * if we get to the same event multiple times 3824 * only worry about the first one. 3825 */ 3826 if (ap->arrowp->tail->myevent->cached_state & 3827 CAUSES_TESTED) { 3828 indent(); 3829 out(O_ALTFP|O_VERB|O_NONL, 3830 " causes test already run for "); 3831 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3832 ap->arrowp->tail->myevent); 3833 out(O_ALTFP|O_VERB, NULL); 3834 continue; 3835 } 3836 3837 /* 3838 * see if false constraint prevents us 3839 * from traversing this arrow 3840 */ 3841 platform_set_payloadnvp(ep->nvp); 3842 if (checkconstraints(fmep, ap->arrowp) == 0) 3843 do_not_follow = 1; 3844 platform_set_payloadnvp(NULL); 3845 if (do_not_follow) { 3846 indent(); 3847 out(O_ALTFP|O_VERB|O_NONL, 3848 " False arrow from "); 3849 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3850 ap->arrowp->tail->myevent); 3851 out(O_ALTFP|O_VERB, NULL); 3852 continue; 3853 } 3854 3855 ap->arrowp->tail->myevent->cached_state |= 3856 CAUSES_TESTED; 3857 tail_event = ap->arrowp->tail->myevent; 3858 fstate = hypothesise(fmep, tail_event, at_latest_by, 3859 &my_delay); 3860 3861 switch (fstate) { 3862 case FME_WAIT: 3863 if (my_delay < overall_delay) 3864 overall_delay = my_delay; 3865 waiting_results++; 3866 break; 3867 case FME_CREDIBLE: 3868 credible_results++; 3869 break; 3870 case FME_DISPROVED: 3871 break; 3872 default: 3873 out(O_DIE, "Bug in causes_test"); 3874 } 3875 } 3876 } 3877 /* compare against K */ 3878 if (credible_results + waiting_results < k) { 3879 indent(); 3880 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3881 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3882 out(O_ALTFP|O_VERB, NULL); 3883 indent_pop(); 3884 return (FME_DISPROVED); 3885 } 3886 if (waiting_results != 0) { 3887 *pdelay = overall_delay; 3888 indent(); 3889 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3890 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3891 out(O_ALTFP|O_VERB|O_NONL, " to "); 3892 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3893 out(O_ALTFP|O_VERB, NULL); 3894 indent_pop(); 3895 return (FME_WAIT); 3896 } 3897 indent(); 3898 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3899 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3900 out(O_ALTFP|O_VERB, NULL); 3901 indent_pop(); 3902 return (FME_CREDIBLE); 3903 } 3904 3905 static enum fme_state 3906 hypothesise(struct fme *fmep, struct event *ep, 3907 unsigned long long at_latest_by, unsigned long long *pdelay) 3908 { 3909 enum fme_state rtr, otr; 3910 unsigned long long my_delay; 3911 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3912 3913 stats_counter_bump(fmep->Hcallcount); 3914 indent_push(" H"); 3915 indent(); 3916 out(O_ALTFP|O_VERB|O_NONL, "->"); 3917 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3918 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3919 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3920 out(O_ALTFP|O_VERB, NULL); 3921 3922 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3923 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3924 overall_delay = my_delay; 3925 if (rtr != FME_DISPROVED) { 3926 if (is_problem(ep->t)) { 3927 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3928 if (otr != FME_DISPROVED) { 3929 if (fmep->peek == 0 && ep->is_suspect == 0) { 3930 ep->suspects = fmep->suspects; 3931 ep->is_suspect = 1; 3932 fmep->suspects = ep; 3933 fmep->nsuspects++; 3934 if (!is_fault(ep->t)) 3935 fmep->nonfault++; 3936 } 3937 } 3938 } else 3939 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3940 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3941 overall_delay = my_delay; 3942 if ((otr != FME_DISPROVED) && 3943 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3944 *pdelay = overall_delay; 3945 } 3946 if (rtr == FME_DISPROVED) { 3947 indent(); 3948 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3949 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3950 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3951 indent_pop(); 3952 return (FME_DISPROVED); 3953 } 3954 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3955 indent(); 3956 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3957 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3958 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3959 indent_pop(); 3960 return (FME_DISPROVED); 3961 } 3962 if (otr == FME_DISPROVED) { 3963 indent(); 3964 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3965 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3966 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3967 indent_pop(); 3968 return (FME_DISPROVED); 3969 } 3970 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3971 indent(); 3972 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3973 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3974 out(O_ALTFP|O_VERB|O_NONL, " to "); 3975 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3976 out(O_ALTFP|O_VERB, NULL); 3977 indent_pop(); 3978 return (FME_WAIT); 3979 } 3980 indent(); 3981 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3982 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3983 out(O_ALTFP|O_VERB, NULL); 3984 indent_pop(); 3985 return (FME_CREDIBLE); 3986 } 3987 3988 /* 3989 * fme_istat_load -- reconstitute any persistent istats 3990 */ 3991 void 3992 fme_istat_load(fmd_hdl_t *hdl) 3993 { 3994 int sz; 3995 char *sbuf; 3996 char *ptr; 3997 3998 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3999 out(O_ALTFP, "fme_istat_load: No stats"); 4000 return; 4001 } 4002 4003 sbuf = alloca(sz); 4004 4005 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 4006 4007 /* 4008 * pick apart the serialized stats 4009 * 4010 * format is: 4011 * <class-name>, '@', <path>, '\0', <value>, '\0' 4012 * for example: 4013 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 4014 * 4015 * since this is parsing our own serialized data, any parsing issues 4016 * are fatal, so we check for them all with ASSERT() below. 4017 */ 4018 ptr = sbuf; 4019 while (ptr < &sbuf[sz]) { 4020 char *sepptr; 4021 struct node *np; 4022 int val; 4023 4024 sepptr = strchr(ptr, '@'); 4025 ASSERT(sepptr != NULL); 4026 *sepptr = '\0'; 4027 4028 /* construct the event */ 4029 np = newnode(T_EVENT, NULL, 0); 4030 np->u.event.ename = newnode(T_NAME, NULL, 0); 4031 np->u.event.ename->u.name.t = N_STAT; 4032 np->u.event.ename->u.name.s = stable(ptr); 4033 np->u.event.ename->u.name.it = IT_ENAME; 4034 np->u.event.ename->u.name.last = np->u.event.ename; 4035 4036 ptr = sepptr + 1; 4037 ASSERT(ptr < &sbuf[sz]); 4038 ptr += strlen(ptr); 4039 ptr++; /* move past the '\0' separating path from value */ 4040 ASSERT(ptr < &sbuf[sz]); 4041 ASSERT(isdigit(*ptr)); 4042 val = atoi(ptr); 4043 ASSERT(val > 0); 4044 ptr += strlen(ptr); 4045 ptr++; /* move past the final '\0' for this entry */ 4046 4047 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 4048 ASSERT(np->u.event.epname != NULL); 4049 4050 istat_bump(np, val); 4051 tree_free(np); 4052 } 4053 4054 istat_save(); 4055 } 4056