1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 #include "esclex.h" 58 59 /* imported from eft.c... */ 60 extern char *Autoclose; 61 extern hrtime_t Hesitate; 62 extern char *Serd_Override; 63 extern nv_alloc_t Eft_nv_hdl; 64 extern int Max_fme; 65 extern fmd_hdl_t *Hdl; 66 67 static int Istat_need_save; 68 static int Serd_need_save; 69 void istat_save(void); 70 void serd_save(void); 71 72 /* fme under construction is global so we can free it on module abort */ 73 static struct fme *Nfmep; 74 75 static const char *Undiag_reason; 76 77 static int Nextid = 0; 78 79 static int Open_fme_count = 0; /* Count of open FMEs */ 80 81 /* list of fault management exercises underway */ 82 static struct fme { 83 struct fme *next; /* next exercise */ 84 unsigned long long ull; /* time when fme was created */ 85 int id; /* FME id */ 86 struct config *config; /* cooked configuration data */ 87 struct lut *eventtree; /* propagation tree for this FME */ 88 /* 89 * The initial error report that created this FME is kept in 90 * two forms. e0 points to the instance tree node and is used 91 * by fme_eval() as the starting point for the inference 92 * algorithm. e0r is the event handle FMD passed to us when 93 * the ereport first arrived and is used when setting timers, 94 * which are always relative to the time of this initial 95 * report. 96 */ 97 struct event *e0; 98 fmd_event_t *e0r; 99 100 id_t timer; /* for setting an fmd time-out */ 101 102 struct event *ecurrent; /* ereport under consideration */ 103 struct event *suspects; /* current suspect list */ 104 struct event *psuspects; /* previous suspect list */ 105 int nsuspects; /* count of suspects */ 106 int nonfault; /* zero if all suspects T_FAULT */ 107 int posted_suspects; /* true if we've posted a diagnosis */ 108 int uniqobs; /* number of unique events observed */ 109 int peek; /* just peeking, don't track suspects */ 110 int overflow; /* true if overflow FME */ 111 enum fme_state { 112 FME_NOTHING = 5000, /* not evaluated yet */ 113 FME_WAIT, /* need to wait for more info */ 114 FME_CREDIBLE, /* suspect list is credible */ 115 FME_DISPROVED, /* no valid suspects found */ 116 FME_DEFERRED /* don't know yet (k-count not met) */ 117 } state; 118 119 unsigned long long pull; /* time passed since created */ 120 unsigned long long wull; /* wait until this time for re-eval */ 121 struct event *observations; /* observation list */ 122 struct lut *globals; /* values of global variables */ 123 /* fmd interfacing */ 124 fmd_hdl_t *hdl; /* handle for talking with fmd */ 125 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 126 /* stats */ 127 struct stats *Rcount; 128 struct stats *Hcallcount; 129 struct stats *Rcallcount; 130 struct stats *Ccallcount; 131 struct stats *Ecallcount; 132 struct stats *Tcallcount; 133 struct stats *Marrowcount; 134 struct stats *diags; 135 } *FMElist, *EFMElist, *ClosedFMEs; 136 137 static struct case_list { 138 fmd_case_t *fmcase; 139 struct case_list *next; 140 } *Undiagablecaselist; 141 142 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 143 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 144 unsigned long long at_latest_by, unsigned long long *pdelay); 145 static struct node *eventprop_lookup(struct event *ep, const char *propname); 146 static struct node *pathstring2epnamenp(char *path); 147 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, 148 fmd_case_t *fmcase); 149 static void restore_suspects(struct fme *fmep); 150 static void save_suspects(struct fme *fmep); 151 static void destroy_fme(struct fme *f); 152 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 153 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 154 static void istat_counter_reset_cb(struct istat_entry *entp, 155 struct stats *statp, const struct ipath *ipp); 156 static void istat_counter_topo_chg_cb(struct istat_entry *entp, 157 struct stats *statp, void *unused); 158 static void serd_reset_cb(struct serd_entry *entp, void *unused, 159 const struct ipath *ipp); 160 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused, 161 void *unused2); 162 static void destroy_fme_bufs(struct fme *fp); 163 164 static struct fme * 165 alloc_fme(void) 166 { 167 struct fme *fmep; 168 169 fmep = MALLOC(sizeof (*fmep)); 170 bzero(fmep, sizeof (*fmep)); 171 return (fmep); 172 } 173 174 /* 175 * fme_ready -- called when all initialization of the FME (except for 176 * stats) has completed successfully. Adds the fme to global lists 177 * and establishes its stats. 178 */ 179 static struct fme * 180 fme_ready(struct fme *fmep) 181 { 182 char nbuf[100]; 183 184 Nfmep = NULL; /* don't need to free this on module abort now */ 185 186 if (EFMElist) { 187 EFMElist->next = fmep; 188 EFMElist = fmep; 189 } else 190 FMElist = EFMElist = fmep; 191 192 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 193 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 194 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 195 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 196 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 197 fmep->Rcallcount = stats_new_counter(nbuf, 198 "calls to requirements_test()", 1); 199 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 200 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 201 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 202 fmep->Ecallcount = 203 stats_new_counter(nbuf, "calls to effects_test()", 1); 204 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 205 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 206 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 207 fmep->Marrowcount = stats_new_counter(nbuf, 208 "arrows marked by mark_arrows()", 1); 209 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 210 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 211 212 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 213 config_print(O_ALTFP|O_VERB2, fmep->config); 214 215 return (fmep); 216 } 217 218 extern void ipath_dummy_lut(struct arrow *); 219 extern struct lut *itree_create_dummy(const char *, const struct ipath *); 220 221 /* ARGSUSED */ 222 static void 223 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 224 { 225 struct bubble *bp; 226 struct arrowlist *ap; 227 228 for (bp = itree_next_bubble(ep, NULL); bp; 229 bp = itree_next_bubble(ep, bp)) { 230 if (bp->t != B_FROM) 231 continue; 232 for (ap = itree_next_arrow(bp, NULL); ap; 233 ap = itree_next_arrow(bp, ap)) { 234 ap->arrowp->pnode->u.arrow.needed = 1; 235 ipath_dummy_lut(ap->arrowp); 236 } 237 } 238 } 239 240 /* ARGSUSED */ 241 static void 242 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 243 { 244 struct bubble *bp; 245 struct arrowlist *ap; 246 247 for (bp = itree_next_bubble(ep, NULL); bp; 248 bp = itree_next_bubble(ep, bp)) { 249 if (bp->t != B_FROM) 250 continue; 251 for (ap = itree_next_arrow(bp, NULL); ap; 252 ap = itree_next_arrow(bp, ap)) 253 ap->arrowp->pnode->u.arrow.needed = 0; 254 } 255 } 256 257 static void globals_destructor(void *left, void *right, void *arg); 258 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep); 259 260 static void 261 prune_propagations(const char *e0class, const struct ipath *e0ipp) 262 { 263 char nbuf[100]; 264 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 265 extern struct lut *Usednames; 266 267 Nfmep = alloc_fme(); 268 Nfmep->id = Nextid; 269 Nfmep->state = FME_NOTHING; 270 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp); 271 if ((Nfmep->e0 = 272 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 273 out(O_ALTFP, "prune_propagations: e0 not in instance tree"); 274 itree_free(Nfmep->eventtree); 275 FREE(Nfmep); 276 Nfmep = NULL; 277 return; 278 } 279 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0; 280 Nfmep->e0->count++; 281 282 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id); 283 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 284 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id); 285 Nfmep->Hcallcount = 286 stats_new_counter(nbuf, "calls to hypothesise()", 1); 287 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id); 288 Nfmep->Rcallcount = stats_new_counter(nbuf, 289 "calls to requirements_test()", 1); 290 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id); 291 Nfmep->Ccallcount = 292 stats_new_counter(nbuf, "calls to causes_test()", 1); 293 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id); 294 Nfmep->Ecallcount = 295 stats_new_counter(nbuf, "calls to effects_test()", 1); 296 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id); 297 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 298 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id); 299 Nfmep->Marrowcount = stats_new_counter(nbuf, 300 "arrows marked by mark_arrows()", 1); 301 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id); 302 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 303 304 Nfmep->peek = 1; 305 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep); 306 lut_free(Usednames, NULL, NULL); 307 Usednames = NULL; 308 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep); 309 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay); 310 itree_prune(Nfmep->eventtree); 311 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep); 312 313 stats_delete(Nfmep->Rcount); 314 stats_delete(Nfmep->Hcallcount); 315 stats_delete(Nfmep->Rcallcount); 316 stats_delete(Nfmep->Ccallcount); 317 stats_delete(Nfmep->Ecallcount); 318 stats_delete(Nfmep->Tcallcount); 319 stats_delete(Nfmep->Marrowcount); 320 stats_delete(Nfmep->diags); 321 itree_free(Nfmep->eventtree); 322 lut_free(Nfmep->globals, globals_destructor, NULL); 323 FREE(Nfmep); 324 } 325 326 static struct fme * 327 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl, 328 fmd_case_t *fmcase) 329 { 330 struct cfgdata *cfgdata; 331 int init_size; 332 extern int alloc_total(); 333 334 init_size = alloc_total(); 335 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size); 336 if ((cfgdata = config_snapshot()) == NULL) { 337 out(O_ALTFP, "newfme: NULL configuration"); 338 Undiag_reason = UD_NOCONF; 339 return (NULL); 340 } 341 platform_save_config(hdl, fmcase); 342 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes", 343 alloc_total() - init_size); 344 345 Nfmep = alloc_fme(); 346 347 Nfmep->id = Nextid++; 348 Nfmep->config = cfgdata->cooked; 349 config_free(cfgdata); 350 Nfmep->posted_suspects = 0; 351 Nfmep->uniqobs = 0; 352 Nfmep->state = FME_NOTHING; 353 Nfmep->pull = 0ULL; 354 Nfmep->overflow = 0; 355 356 Nfmep->fmcase = fmcase; 357 Nfmep->hdl = hdl; 358 359 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) { 360 out(O_ALTFP, "newfme: NULL instance tree"); 361 Undiag_reason = UD_INSTFAIL; 362 structconfig_free(Nfmep->config); 363 destroy_fme_bufs(Nfmep); 364 FREE(Nfmep); 365 Nfmep = NULL; 366 return (NULL); 367 } 368 369 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 370 371 if ((Nfmep->e0 = 372 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 373 out(O_ALTFP, "newfme: e0 not in instance tree"); 374 Undiag_reason = UD_BADEVENTI; 375 itree_free(Nfmep->eventtree); 376 structconfig_free(Nfmep->config); 377 destroy_fme_bufs(Nfmep); 378 FREE(Nfmep); 379 Nfmep = NULL; 380 return (NULL); 381 } 382 383 return (fme_ready(Nfmep)); 384 } 385 386 void 387 fme_fini(void) 388 { 389 struct fme *sfp, *fp; 390 struct case_list *ucasep, *nextcasep; 391 392 ucasep = Undiagablecaselist; 393 while (ucasep != NULL) { 394 nextcasep = ucasep->next; 395 FREE(ucasep); 396 ucasep = nextcasep; 397 } 398 Undiagablecaselist = NULL; 399 400 /* clean up closed fmes */ 401 fp = ClosedFMEs; 402 while (fp != NULL) { 403 sfp = fp->next; 404 destroy_fme(fp); 405 fp = sfp; 406 } 407 ClosedFMEs = NULL; 408 409 fp = FMElist; 410 while (fp != NULL) { 411 sfp = fp->next; 412 destroy_fme(fp); 413 fp = sfp; 414 } 415 FMElist = EFMElist = NULL; 416 417 /* if we were in the middle of creating an fme, free it now */ 418 if (Nfmep) { 419 destroy_fme(Nfmep); 420 Nfmep = NULL; 421 } 422 } 423 424 /* 425 * Allocated space for a buffer name. 20 bytes allows for 426 * a ridiculous 9,999,999 unique observations. 427 */ 428 #define OBBUFNMSZ 20 429 430 /* 431 * serialize_observation 432 * 433 * Create a recoverable version of the current observation 434 * (f->ecurrent). We keep a serialized version of each unique 435 * observation in order that we may resume correctly the fme in the 436 * correct state if eft or fmd crashes and we're restarted. 437 */ 438 static void 439 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 440 { 441 size_t pkdlen; 442 char tmpbuf[OBBUFNMSZ]; 443 char *pkd = NULL; 444 char *estr; 445 446 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 447 estr = ipath2str(cls, ipp); 448 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 449 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 450 strlen(estr) + 1); 451 FREE(estr); 452 453 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 454 (void) snprintf(tmpbuf, 455 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 456 if (nvlist_xpack(fp->ecurrent->nvp, 457 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 458 out(O_DIE|O_SYS, "pack of observed nvl failed"); 459 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 460 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 461 FREE(pkd); 462 } 463 464 fp->uniqobs++; 465 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 466 sizeof (fp->uniqobs)); 467 } 468 469 /* 470 * init_fme_bufs -- We keep several bits of state about an fme for 471 * use if eft or fmd crashes and we're restarted. 472 */ 473 static void 474 init_fme_bufs(struct fme *fp) 475 { 476 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 477 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 478 sizeof (fp->pull)); 479 480 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 481 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 482 sizeof (fp->id)); 483 484 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 485 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 486 sizeof (fp->uniqobs)); 487 488 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 489 sizeof (fp->posted_suspects)); 490 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 491 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 492 } 493 494 static void 495 destroy_fme_bufs(struct fme *fp) 496 { 497 char tmpbuf[OBBUFNMSZ]; 498 int o; 499 500 platform_restore_config(fp->hdl, fp->fmcase); 501 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 502 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 503 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 504 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 505 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 506 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 507 508 for (o = 0; o < fp->uniqobs; o++) { 509 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 510 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 511 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 512 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 513 } 514 } 515 516 /* 517 * reconstitute_observations -- convert a case's serialized observations 518 * back into struct events. Returns zero if all observations are 519 * successfully reconstituted. 520 */ 521 static int 522 reconstitute_observations(struct fme *fmep) 523 { 524 struct event *ep; 525 struct node *epnamenp = NULL; 526 size_t pkdlen; 527 char *pkd = NULL; 528 char *tmpbuf = alloca(OBBUFNMSZ); 529 char *sepptr; 530 char *estr; 531 int ocnt; 532 int elen; 533 534 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 535 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 536 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 537 if (elen == 0) { 538 out(O_ALTFP, 539 "reconstitute_observation: no %s buffer found.", 540 tmpbuf); 541 Undiag_reason = UD_MISSINGOBS; 542 break; 543 } 544 545 estr = MALLOC(elen); 546 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 547 sepptr = strchr(estr, '@'); 548 if (sepptr == NULL) { 549 out(O_ALTFP, 550 "reconstitute_observation: %s: " 551 "missing @ separator in %s.", 552 tmpbuf, estr); 553 Undiag_reason = UD_MISSINGPATH; 554 FREE(estr); 555 break; 556 } 557 558 *sepptr = '\0'; 559 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 560 out(O_ALTFP, 561 "reconstitute_observation: %s: " 562 "trouble converting path string \"%s\" " 563 "to internal representation.", 564 tmpbuf, sepptr + 1); 565 Undiag_reason = UD_MISSINGPATH; 566 FREE(estr); 567 break; 568 } 569 570 /* construct the event */ 571 ep = itree_lookup(fmep->eventtree, 572 stable(estr), ipath(epnamenp)); 573 if (ep == NULL) { 574 out(O_ALTFP, 575 "reconstitute_observation: %s: " 576 "lookup of \"%s\" in itree failed.", 577 tmpbuf, ipath2str(estr, ipath(epnamenp))); 578 Undiag_reason = UD_BADOBS; 579 tree_free(epnamenp); 580 FREE(estr); 581 break; 582 } 583 tree_free(epnamenp); 584 585 /* 586 * We may or may not have a saved nvlist for the observation 587 */ 588 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 589 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 590 if (pkdlen != 0) { 591 pkd = MALLOC(pkdlen); 592 fmd_buf_read(fmep->hdl, 593 fmep->fmcase, tmpbuf, pkd, pkdlen); 594 ASSERT(ep->nvp == NULL); 595 if (nvlist_xunpack(pkd, 596 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 597 out(O_DIE|O_SYS, "pack of observed nvl failed"); 598 FREE(pkd); 599 } 600 601 if (ocnt == 0) 602 fmep->e0 = ep; 603 604 FREE(estr); 605 fmep->ecurrent = ep; 606 ep->count++; 607 608 /* link it into list of observations seen */ 609 ep->observations = fmep->observations; 610 fmep->observations = ep; 611 } 612 613 if (ocnt == fmep->uniqobs) { 614 (void) fme_ready(fmep); 615 return (0); 616 } 617 618 return (1); 619 } 620 621 /* 622 * restart_fme -- called during eft initialization. Reconstitutes 623 * an in-progress fme. 624 */ 625 void 626 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 627 { 628 nvlist_t *defect; 629 struct case_list *bad; 630 struct fme *fmep; 631 struct cfgdata *cfgdata; 632 size_t rawsz; 633 struct event *ep; 634 char *tmpbuf = alloca(OBBUFNMSZ); 635 char *sepptr; 636 char *estr; 637 int elen; 638 struct node *epnamenp = NULL; 639 int init_size; 640 extern int alloc_total(); 641 642 /* 643 * ignore solved or closed cases 644 */ 645 if (fmd_case_solved(hdl, inprogress) || 646 fmd_case_closed(hdl, inprogress)) 647 return; 648 649 fmep = alloc_fme(); 650 fmep->fmcase = inprogress; 651 fmep->hdl = hdl; 652 653 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 654 out(O_ALTFP, "restart_fme: no saved posted status"); 655 Undiag_reason = UD_MISSINGINFO; 656 goto badcase; 657 } else { 658 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 659 (void *)&fmep->posted_suspects, 660 sizeof (fmep->posted_suspects)); 661 } 662 663 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 664 out(O_ALTFP, "restart_fme: no saved id"); 665 Undiag_reason = UD_MISSINGINFO; 666 goto badcase; 667 } else { 668 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 669 sizeof (fmep->id)); 670 } 671 if (Nextid <= fmep->id) 672 Nextid = fmep->id + 1; 673 674 out(O_ALTFP, "Replay FME %d", fmep->id); 675 676 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 677 out(O_ALTFP, "restart_fme: No config data"); 678 Undiag_reason = UD_MISSINGINFO; 679 goto badcase; 680 } 681 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 682 sizeof (size_t)); 683 684 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 685 out(O_ALTFP, "restart_fme: No event zero"); 686 Undiag_reason = UD_MISSINGZERO; 687 goto badcase; 688 } 689 690 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 691 out(O_ALTFP, "restart_fme: no saved wait time"); 692 Undiag_reason = UD_MISSINGINFO; 693 goto badcase; 694 } else { 695 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 696 sizeof (fmep->pull)); 697 } 698 699 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 700 out(O_ALTFP, "restart_fme: no count of observations"); 701 Undiag_reason = UD_MISSINGINFO; 702 goto badcase; 703 } else { 704 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 705 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 706 } 707 708 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0"); 709 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 710 if (elen == 0) { 711 out(O_ALTFP, "reconstitute_observation: no %s buffer found.", 712 tmpbuf); 713 Undiag_reason = UD_MISSINGOBS; 714 goto badcase; 715 } 716 estr = MALLOC(elen); 717 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 718 sepptr = strchr(estr, '@'); 719 if (sepptr == NULL) { 720 out(O_ALTFP, "reconstitute_observation: %s: " 721 "missing @ separator in %s.", 722 tmpbuf, estr); 723 Undiag_reason = UD_MISSINGPATH; 724 FREE(estr); 725 goto badcase; 726 } 727 *sepptr = '\0'; 728 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 729 out(O_ALTFP, "reconstitute_observation: %s: " 730 "trouble converting path string \"%s\" " 731 "to internal representation.", tmpbuf, sepptr + 1); 732 Undiag_reason = UD_MISSINGPATH; 733 FREE(estr); 734 goto badcase; 735 } 736 prune_propagations(stable(estr), ipath(epnamenp)); 737 tree_free(epnamenp); 738 FREE(estr); 739 740 init_size = alloc_total(); 741 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size); 742 cfgdata = MALLOC(sizeof (struct cfgdata)); 743 cfgdata->cooked = NULL; 744 cfgdata->devcache = NULL; 745 cfgdata->devidcache = NULL; 746 cfgdata->cpucache = NULL; 747 cfgdata->raw_refcnt = 1; 748 749 if (rawsz > 0) { 750 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 751 out(O_ALTFP, "restart_fme: Config data size mismatch"); 752 Undiag_reason = UD_CFGMISMATCH; 753 goto badcase; 754 } 755 cfgdata->begin = MALLOC(rawsz); 756 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 757 fmd_buf_read(hdl, 758 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 759 } else { 760 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 761 } 762 763 config_cook(cfgdata); 764 fmep->config = cfgdata->cooked; 765 config_free(cfgdata); 766 out(O_ALTFP|O_STAMP, "config_restore added %d bytes", 767 alloc_total() - init_size); 768 769 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) { 770 /* case not properly saved or irretrievable */ 771 out(O_ALTFP, "restart_fme: NULL instance tree"); 772 Undiag_reason = UD_INSTFAIL; 773 goto badcase; 774 } 775 776 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 777 778 if (reconstitute_observations(fmep) != 0) 779 goto badcase; 780 781 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id); 782 for (ep = fmep->observations; ep; ep = ep->observations) { 783 out(O_ALTFP|O_NONL, " "); 784 itree_pevent_brief(O_ALTFP|O_NONL, ep); 785 } 786 out(O_ALTFP, NULL); 787 788 Open_fme_count++; 789 790 /* give the diagnosis algorithm a shot at the new FME state */ 791 fme_eval(fmep, fmep->e0r); 792 return; 793 794 badcase: 795 if (fmep->eventtree != NULL) 796 itree_free(fmep->eventtree); 797 if (fmep->config) 798 structconfig_free(fmep->config); 799 destroy_fme_bufs(fmep); 800 FREE(fmep); 801 802 /* 803 * Since we're unable to restart the case, add it to the undiagable 804 * list and solve and close it as appropriate. 805 */ 806 bad = MALLOC(sizeof (struct case_list)); 807 bad->next = NULL; 808 809 if (Undiagablecaselist != NULL) 810 bad->next = Undiagablecaselist; 811 Undiagablecaselist = bad; 812 bad->fmcase = inprogress; 813 814 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ", 815 fmd_case_uuid(hdl, bad->fmcase)); 816 817 if (fmd_case_solved(hdl, bad->fmcase)) { 818 out(O_ALTFP|O_NONL, "already solved, "); 819 } else { 820 out(O_ALTFP|O_NONL, "solving, "); 821 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 822 NULL, NULL, NULL); 823 if (Undiag_reason != NULL) 824 (void) nvlist_add_string(defect, 825 UNDIAG_REASON, Undiag_reason); 826 fmd_case_add_suspect(hdl, bad->fmcase, defect); 827 fmd_case_solve(hdl, bad->fmcase); 828 } 829 830 if (fmd_case_closed(hdl, bad->fmcase)) { 831 out(O_ALTFP, "already closed ]"); 832 } else { 833 out(O_ALTFP, "closing ]"); 834 fmd_case_close(hdl, bad->fmcase); 835 } 836 } 837 838 /*ARGSUSED*/ 839 static void 840 globals_destructor(void *left, void *right, void *arg) 841 { 842 struct evalue *evp = (struct evalue *)right; 843 if (evp->t == NODEPTR) 844 tree_free((struct node *)(uintptr_t)evp->v); 845 evp->v = (uintptr_t)NULL; 846 FREE(evp); 847 } 848 849 void 850 destroy_fme(struct fme *f) 851 { 852 stats_delete(f->Rcount); 853 stats_delete(f->Hcallcount); 854 stats_delete(f->Rcallcount); 855 stats_delete(f->Ccallcount); 856 stats_delete(f->Ecallcount); 857 stats_delete(f->Tcallcount); 858 stats_delete(f->Marrowcount); 859 stats_delete(f->diags); 860 861 if (f->eventtree != NULL) 862 itree_free(f->eventtree); 863 if (f->config) 864 structconfig_free(f->config); 865 lut_free(f->globals, globals_destructor, NULL); 866 FREE(f); 867 } 868 869 static const char * 870 fme_state2str(enum fme_state s) 871 { 872 switch (s) { 873 case FME_NOTHING: return ("NOTHING"); 874 case FME_WAIT: return ("WAIT"); 875 case FME_CREDIBLE: return ("CREDIBLE"); 876 case FME_DISPROVED: return ("DISPROVED"); 877 case FME_DEFERRED: return ("DEFERRED"); 878 default: return ("UNKNOWN"); 879 } 880 } 881 882 static int 883 is_problem(enum nametype t) 884 { 885 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 886 } 887 888 static int 889 is_fault(enum nametype t) 890 { 891 return (t == N_FAULT); 892 } 893 894 static int 895 is_defect(enum nametype t) 896 { 897 return (t == N_DEFECT); 898 } 899 900 static int 901 is_upset(enum nametype t) 902 { 903 return (t == N_UPSET); 904 } 905 906 static void 907 fme_print(int flags, struct fme *fmep) 908 { 909 struct event *ep; 910 911 out(flags, "Fault Management Exercise %d", fmep->id); 912 out(flags, "\t State: %s", fme_state2str(fmep->state)); 913 out(flags|O_NONL, "\t Start time: "); 914 ptree_timeval(flags|O_NONL, &fmep->ull); 915 out(flags, NULL); 916 if (fmep->wull) { 917 out(flags|O_NONL, "\t Wait time: "); 918 ptree_timeval(flags|O_NONL, &fmep->wull); 919 out(flags, NULL); 920 } 921 out(flags|O_NONL, "\t E0: "); 922 if (fmep->e0) 923 itree_pevent_brief(flags|O_NONL, fmep->e0); 924 else 925 out(flags|O_NONL, "NULL"); 926 out(flags, NULL); 927 out(flags|O_NONL, "\tObservations:"); 928 for (ep = fmep->observations; ep; ep = ep->observations) { 929 out(flags|O_NONL, " "); 930 itree_pevent_brief(flags|O_NONL, ep); 931 } 932 out(flags, NULL); 933 out(flags|O_NONL, "\tSuspect list:"); 934 for (ep = fmep->suspects; ep; ep = ep->suspects) { 935 out(flags|O_NONL, " "); 936 itree_pevent_brief(flags|O_NONL, ep); 937 } 938 out(flags, NULL); 939 if (fmep->eventtree != NULL) { 940 out(flags|O_VERB2, "\t Tree:"); 941 itree_ptree(flags|O_VERB2, fmep->eventtree); 942 } 943 } 944 945 static struct node * 946 pathstring2epnamenp(char *path) 947 { 948 char *sep = "/"; 949 struct node *ret; 950 char *ptr; 951 952 if ((ptr = strtok(path, sep)) == NULL) 953 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 954 955 ret = tree_iname(stable(ptr), NULL, 0); 956 957 while ((ptr = strtok(NULL, sep)) != NULL) 958 ret = tree_name_append(ret, 959 tree_iname(stable(ptr), NULL, 0)); 960 961 return (ret); 962 } 963 964 /* 965 * for a given upset sp, increment the corresponding SERD engine. if the 966 * SERD engine trips, return the ename and ipp of the resulting ereport. 967 * returns true if engine tripped and *enamep and *ippp were filled in. 968 */ 969 static int 970 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 971 fmd_case_t *fmcase, struct event *sp, const char **enamep, 972 const struct ipath **ippp) 973 { 974 struct node *serdinst; 975 char *serdname; 976 struct node *nid; 977 struct serd_entry *newentp; 978 979 ASSERT(sp->t == N_UPSET); 980 ASSERT(ffep != NULL); 981 982 /* 983 * obtain instanced SERD engine from the upset sp. from this 984 * derive serdname, the string used to identify the SERD engine. 985 */ 986 serdinst = eventprop_lookup(sp, L_engine); 987 988 if (serdinst == NULL) 989 return (0); 990 991 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 992 ipath(serdinst->u.stmt.np->u.event.epname)); 993 994 /* handle serd engine "id" property, if there is one */ 995 if ((nid = 996 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 997 struct evalue *gval; 998 char suffixbuf[200]; 999 char *suffix; 1000 char *nserdname; 1001 size_t nname; 1002 1003 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 1004 ptree_name_iter(O_ALTFP|O_NONL, nid); 1005 1006 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 1007 1008 if ((gval = lut_lookup(fmep->globals, 1009 (void *)nid->u.globid.s, NULL)) == NULL) { 1010 out(O_ALTFP, " undefined"); 1011 } else if (gval->t == UINT64) { 1012 out(O_ALTFP, " %llu", gval->v); 1013 (void) sprintf(suffixbuf, "%llu", gval->v); 1014 suffix = suffixbuf; 1015 } else { 1016 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 1017 suffix = (char *)(uintptr_t)gval->v; 1018 } 1019 1020 nname = strlen(serdname) + strlen(suffix) + 2; 1021 nserdname = MALLOC(nname); 1022 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 1023 FREE(serdname); 1024 serdname = nserdname; 1025 } 1026 1027 if (!fmd_serd_exists(hdl, serdname)) { 1028 struct node *nN, *nT; 1029 const char *s; 1030 struct node *nodep; 1031 struct config *cp; 1032 char *path; 1033 uint_t nval; 1034 hrtime_t tval; 1035 const char *name; 1036 char *serd_name; 1037 int i; 1038 char *ptr; 1039 int got_n_override = 0, got_t_override = 0; 1040 1041 /* no SERD engine yet, so create it */ 1042 nodep = serdinst->u.stmt.np->u.event.epname; 1043 name = serdinst->u.stmt.np->u.event.ename->u.name.s; 1044 path = ipath2str(NULL, ipath(nodep)); 1045 cp = config_lookup(fmep->config, path, 0); 1046 FREE((void *)path); 1047 1048 /* 1049 * We allow serd paramaters to be overridden, either from 1050 * eft.conf file values (if Serd_Override is set) or from 1051 * driver properties (for "serd.io.device" engines). 1052 */ 1053 if (Serd_Override != NULL) { 1054 char *save_ptr, *ptr1, *ptr2, *ptr3; 1055 ptr3 = save_ptr = STRDUP(Serd_Override); 1056 while (*ptr3 != '\0') { 1057 ptr1 = strchr(ptr3, ','); 1058 *ptr1 = '\0'; 1059 if (strcmp(ptr3, name) == 0) { 1060 ptr2 = strchr(ptr1 + 1, ','); 1061 *ptr2 = '\0'; 1062 nval = atoi(ptr1 + 1); 1063 out(O_ALTFP, "serd override %s_n %d", 1064 name, nval); 1065 ptr3 = strchr(ptr2 + 1, ' '); 1066 if (ptr3) 1067 *ptr3 = '\0'; 1068 ptr = STRDUP(ptr2 + 1); 1069 out(O_ALTFP, "serd override %s_t %s", 1070 name, ptr); 1071 got_n_override = 1; 1072 got_t_override = 1; 1073 break; 1074 } else { 1075 ptr2 = strchr(ptr1 + 1, ','); 1076 ptr3 = strchr(ptr2 + 1, ' '); 1077 if (ptr3 == NULL) 1078 break; 1079 } 1080 ptr3++; 1081 } 1082 FREE(save_ptr); 1083 } 1084 1085 if (cp && got_n_override == 0) { 1086 /* 1087 * convert serd engine name into property name 1088 */ 1089 serd_name = MALLOC(strlen(name) + 3); 1090 for (i = 0; i < strlen(name); i++) { 1091 if (name[i] == '.') 1092 serd_name[i] = '_'; 1093 else 1094 serd_name[i] = name[i]; 1095 } 1096 serd_name[i++] = '_'; 1097 serd_name[i++] = 'n'; 1098 serd_name[i] = '\0'; 1099 if (s = config_getprop(cp, serd_name)) { 1100 nval = atoi(s); 1101 out(O_ALTFP, "serd override %s_n %s", name, s); 1102 got_n_override = 1; 1103 } 1104 serd_name[i - 1] = 't'; 1105 if (s = config_getprop(cp, serd_name)) { 1106 ptr = STRDUP(s); 1107 out(O_ALTFP, "serd override %s_t %s", name, s); 1108 got_t_override = 1; 1109 } 1110 FREE(serd_name); 1111 } 1112 1113 if (!got_n_override) { 1114 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, 1115 NULL); 1116 ASSERT(nN->t == T_NUM); 1117 nval = (uint_t)nN->u.ull; 1118 } 1119 if (!got_t_override) { 1120 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, 1121 NULL); 1122 ASSERT(nT->t == T_TIMEVAL); 1123 tval = (hrtime_t)nT->u.ull; 1124 } else { 1125 const unsigned long long *ullp; 1126 const char *suffix; 1127 int len; 1128 1129 len = strspn(ptr, "0123456789"); 1130 suffix = stable(&ptr[len]); 1131 ullp = (unsigned long long *)lut_lookup(Timesuffixlut, 1132 (void *)suffix, NULL); 1133 ptr[len] = '\0'; 1134 tval = (unsigned long long)strtoul(ptr, NULL, 0) * 1135 (ullp ? *ullp : 1ll); 1136 FREE(ptr); 1137 } 1138 fmd_serd_create(hdl, serdname, nval, tval); 1139 } 1140 1141 newentp = MALLOC(sizeof (*newentp)); 1142 newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s); 1143 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname); 1144 newentp->hdl = hdl; 1145 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) { 1146 SerdEngines = lut_add(SerdEngines, (void *)newentp, 1147 (void *)newentp, (lut_cmp)serd_cmp); 1148 Serd_need_save = 1; 1149 serd_save(); 1150 } else { 1151 FREE(newentp); 1152 } 1153 1154 1155 /* 1156 * increment SERD engine. if engine fires, reset serd 1157 * engine and return trip_strcode 1158 */ 1159 if (fmd_serd_record(hdl, serdname, ffep)) { 1160 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 1161 (void *)L_trip, NULL); 1162 1163 ASSERT(tripinst != NULL); 1164 1165 *enamep = tripinst->u.event.ename->u.name.s; 1166 *ippp = ipath(tripinst->u.event.epname); 1167 1168 fmd_case_add_serd(hdl, fmcase, serdname); 1169 fmd_serd_reset(hdl, serdname); 1170 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 1171 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 1172 out(O_ALTFP, "]"); 1173 1174 FREE(serdname); 1175 return (1); 1176 } 1177 1178 FREE(serdname); 1179 return (0); 1180 } 1181 1182 /* 1183 * search a suspect list for upsets. feed each upset to serd_eval() and 1184 * build up tripped[], an array of ereports produced by the firing of 1185 * any SERD engines. then feed each ereport back into 1186 * fme_receive_report(). 1187 * 1188 * returns ntrip, the number of these ereports produced. 1189 */ 1190 static int 1191 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 1192 { 1193 /* we build an array of tripped ereports that we send ourselves */ 1194 struct { 1195 const char *ename; 1196 const struct ipath *ipp; 1197 } *tripped; 1198 struct event *sp; 1199 int ntrip, nupset, i; 1200 1201 /* 1202 * count the number of upsets to determine the upper limit on 1203 * expected trip ereport strings. remember that one upset can 1204 * lead to at most one ereport. 1205 */ 1206 nupset = 0; 1207 for (sp = fmep->suspects; sp; sp = sp->suspects) { 1208 if (sp->t == N_UPSET) 1209 nupset++; 1210 } 1211 1212 if (nupset == 0) 1213 return (0); 1214 1215 /* 1216 * get to this point if we have upsets and expect some trip 1217 * ereports 1218 */ 1219 tripped = alloca(sizeof (*tripped) * nupset); 1220 bzero((void *)tripped, sizeof (*tripped) * nupset); 1221 1222 ntrip = 0; 1223 for (sp = fmep->suspects; sp; sp = sp->suspects) 1224 if (sp->t == N_UPSET && 1225 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 1226 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 1227 ntrip++; 1228 1229 for (i = 0; i < ntrip; i++) { 1230 struct event *ep, *nep; 1231 struct fme *nfmep; 1232 fmd_case_t *fmcase; 1233 const struct ipath *ipp; 1234 const char *eventstring; 1235 int prev_verbose; 1236 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1237 enum fme_state state; 1238 1239 /* 1240 * First try and evaluate a case with the trip ereport plus 1241 * all the other ereports that cause the trip. If that fails 1242 * to evaluate then try again with just this ereport on its own. 1243 */ 1244 out(O_ALTFP|O_NONL, "fme_receive_report_serd: "); 1245 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1246 out(O_ALTFP|O_STAMP, NULL); 1247 ep = fmep->e0; 1248 eventstring = ep->enode->u.event.ename->u.name.s; 1249 ipp = ep->ipp; 1250 prune_propagations(eventstring, ipp); 1251 1252 /* 1253 * create a duplicate fme and case 1254 */ 1255 fmcase = fmd_case_open(fmep->hdl, NULL); 1256 out(O_ALTFP|O_NONL, "duplicate fme for event ["); 1257 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1258 out(O_ALTFP, " ]"); 1259 if ((nfmep = newfme(eventstring, ipp, fmep->hdl, 1260 fmcase)) == NULL) { 1261 out(O_ALTFP|O_NONL, "["); 1262 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1263 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1264 publish_undiagnosable(fmep->hdl, ffep, fmcase); 1265 continue; 1266 } 1267 Open_fme_count++; 1268 nfmep->pull = fmep->pull; 1269 init_fme_bufs(nfmep); 1270 out(O_ALTFP|O_NONL, "["); 1271 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1272 out(O_ALTFP, " created FME%d, case %s]", nfmep->id, 1273 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 1274 if (ffep) { 1275 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep); 1276 nfmep->e0r = ffep; 1277 } 1278 1279 /* 1280 * add the original ereports 1281 */ 1282 for (ep = fmep->observations; ep; ep = ep->observations) { 1283 eventstring = ep->enode->u.event.ename->u.name.s; 1284 ipp = ep->ipp; 1285 out(O_ALTFP|O_NONL, "adding event ["); 1286 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1287 out(O_ALTFP, " ]"); 1288 nep = itree_lookup(nfmep->eventtree, eventstring, ipp); 1289 if (nep->count++ == 0) { 1290 nep->observations = nfmep->observations; 1291 nfmep->observations = nep; 1292 serialize_observation(nfmep, eventstring, ipp); 1293 nep->nvp = evnv_dupnvl(ep->nvp); 1294 } 1295 if (ffep) 1296 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, 1297 ffep); 1298 stats_counter_bump(nfmep->Rcount); 1299 } 1300 1301 /* 1302 * add the serd trigger ereport 1303 */ 1304 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename, 1305 tripped[i].ipp)) == NULL) { 1306 /* 1307 * The trigger ereport is not in the instance tree. It 1308 * was presumably removed by prune_propagations() as 1309 * this combination of events is not present in the 1310 * rules. 1311 */ 1312 out(O_ALTFP, "upsets_eval: e0 not in instance tree"); 1313 Undiag_reason = UD_BADEVENTI; 1314 goto retry_lone_ereport; 1315 } 1316 out(O_ALTFP|O_NONL, "adding event ["); 1317 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1318 out(O_ALTFP, " ]"); 1319 nfmep->ecurrent = ep; 1320 ep->nvp = NULL; 1321 ep->count = 1; 1322 ep->observations = nfmep->observations; 1323 nfmep->observations = ep; 1324 1325 /* 1326 * just peek first. 1327 */ 1328 nfmep->peek = 1; 1329 prev_verbose = Verbose; 1330 if (Debug == 0) 1331 Verbose = 0; 1332 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep); 1333 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay); 1334 nfmep->peek = 0; 1335 Verbose = prev_verbose; 1336 if (state == FME_DISPROVED) { 1337 out(O_ALTFP, "upsets_eval: hypothesis disproved"); 1338 Undiag_reason = UD_UNSOLVD; 1339 retry_lone_ereport: 1340 /* 1341 * However the trigger ereport on its own might be 1342 * diagnosable, so check for that. Undo the new fme 1343 * and case we just created and call fme_receive_report. 1344 */ 1345 out(O_ALTFP|O_NONL, "["); 1346 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, 1347 tripped[i].ipp); 1348 out(O_ALTFP, " retrying with just trigger ereport]"); 1349 itree_free(nfmep->eventtree); 1350 nfmep->eventtree = NULL; 1351 structconfig_free(nfmep->config); 1352 nfmep->config = NULL; 1353 destroy_fme_bufs(nfmep); 1354 fmd_case_close(nfmep->hdl, nfmep->fmcase); 1355 fme_receive_report(fmep->hdl, ffep, 1356 tripped[i].ename, tripped[i].ipp, NULL); 1357 continue; 1358 } 1359 1360 /* 1361 * and evaluate 1362 */ 1363 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp); 1364 if (ffep) 1365 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep); 1366 stats_counter_bump(nfmep->Rcount); 1367 fme_eval(nfmep, ffep); 1368 } 1369 1370 return (ntrip); 1371 } 1372 1373 /* 1374 * fme_receive_external_report -- call when an external ereport comes in 1375 * 1376 * this routine just converts the relevant information from the ereport 1377 * into a format used internally and passes it on to fme_receive_report(). 1378 */ 1379 void 1380 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1381 const char *class) 1382 { 1383 struct node *epnamenp; 1384 fmd_case_t *fmcase; 1385 const struct ipath *ipp; 1386 1387 class = stable(class); 1388 1389 /* Get the component path from the ereport */ 1390 epnamenp = platform_getpath(nvl); 1391 1392 /* See if we ended up without a path. */ 1393 if (epnamenp == NULL) { 1394 /* See if class permits silent discard on unknown component. */ 1395 if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) { 1396 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport " 1397 "to component path, but silent discard allowed.", 1398 class); 1399 } else { 1400 /* 1401 * XFILE: Failure to find a component is bad unless 1402 * 'discard_if_config_unknown=1' was specified in the 1403 * ereport definition. Indicate undiagnosable. 1404 */ 1405 out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport " 1406 "to component path.", class); 1407 Undiag_reason = UD_NOPATH; 1408 fmcase = fmd_case_open(hdl, NULL); 1409 publish_undiagnosable(hdl, ffep, fmcase); 1410 } 1411 return; 1412 } 1413 1414 ipp = ipath(epnamenp); 1415 tree_free(epnamenp); 1416 fme_receive_report(hdl, ffep, class, ipp, nvl); 1417 } 1418 1419 /*ARGSUSED*/ 1420 void 1421 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1422 const char *eventstring) 1423 { 1424 char *uuid; 1425 nvlist_t **nva; 1426 uint_t nvc; 1427 const struct ipath *ipp; 1428 1429 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 1430 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1431 &nva, &nvc) != 0) { 1432 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 1433 return; 1434 } 1435 1436 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 1437 1438 while (nvc-- != 0) { 1439 /* 1440 * Reset any istat or serd engine associated with this path. 1441 */ 1442 char *path; 1443 1444 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 1445 continue; 1446 1447 path = ipath2str(NULL, ipp); 1448 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1449 path); 1450 FREE(path); 1451 1452 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1453 istat_save(); 1454 1455 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp); 1456 serd_save(); 1457 } 1458 } 1459 1460 /*ARGSUSED*/ 1461 void 1462 fme_receive_topology_change(void) 1463 { 1464 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL); 1465 istat_save(); 1466 1467 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL); 1468 serd_save(); 1469 } 1470 1471 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1472 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1473 1474 /* ARGSUSED */ 1475 static void 1476 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1477 { 1478 struct bubble *bp; 1479 struct arrowlist *ap; 1480 1481 ep->cached_state = 0; 1482 ep->keep_in_tree = 0; 1483 for (bp = itree_next_bubble(ep, NULL); bp; 1484 bp = itree_next_bubble(ep, bp)) { 1485 if (bp->t != B_FROM) 1486 continue; 1487 bp->mark = 0; 1488 for (ap = itree_next_arrow(bp, NULL); ap; 1489 ap = itree_next_arrow(bp, ap)) 1490 ap->arrowp->mark = 0; 1491 } 1492 } 1493 1494 static void 1495 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1496 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1497 { 1498 struct event *ep; 1499 struct fme *fmep = NULL; 1500 struct fme *ofmep = NULL; 1501 struct fme *cfmep, *svfmep; 1502 int matched = 0; 1503 nvlist_t *defect; 1504 fmd_case_t *fmcase; 1505 1506 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1507 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1508 out(O_ALTFP|O_STAMP, NULL); 1509 1510 /* decide which FME it goes to */ 1511 for (fmep = FMElist; fmep; fmep = fmep->next) { 1512 int prev_verbose; 1513 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1514 enum fme_state state; 1515 nvlist_t *pre_peek_nvp = NULL; 1516 1517 if (fmep->overflow) { 1518 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1519 ofmep = fmep; 1520 1521 continue; 1522 } 1523 1524 /* 1525 * ignore solved or closed cases 1526 */ 1527 if (fmep->posted_suspects || 1528 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1529 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1530 continue; 1531 1532 /* look up event in event tree for this FME */ 1533 if ((ep = itree_lookup(fmep->eventtree, 1534 eventstring, ipp)) == NULL) 1535 continue; 1536 1537 /* note observation */ 1538 fmep->ecurrent = ep; 1539 if (ep->count++ == 0) { 1540 /* link it into list of observations seen */ 1541 ep->observations = fmep->observations; 1542 fmep->observations = ep; 1543 ep->nvp = evnv_dupnvl(nvl); 1544 } else { 1545 /* use new payload values for peek */ 1546 pre_peek_nvp = ep->nvp; 1547 ep->nvp = evnv_dupnvl(nvl); 1548 } 1549 1550 /* tell hypothesise() not to mess with suspect list */ 1551 fmep->peek = 1; 1552 1553 /* don't want this to be verbose (unless Debug is set) */ 1554 prev_verbose = Verbose; 1555 if (Debug == 0) 1556 Verbose = 0; 1557 1558 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1559 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1560 1561 fmep->peek = 0; 1562 1563 /* put verbose flag back */ 1564 Verbose = prev_verbose; 1565 1566 if (state != FME_DISPROVED) { 1567 /* found an FME that explains the ereport */ 1568 matched++; 1569 out(O_ALTFP|O_NONL, "["); 1570 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1571 out(O_ALTFP, " explained by FME%d]", fmep->id); 1572 1573 if (pre_peek_nvp) 1574 nvlist_free(pre_peek_nvp); 1575 1576 if (ep->count == 1) 1577 serialize_observation(fmep, eventstring, ipp); 1578 1579 if (ffep) 1580 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1581 1582 stats_counter_bump(fmep->Rcount); 1583 1584 /* re-eval FME */ 1585 fme_eval(fmep, ffep); 1586 } else { 1587 1588 /* not a match, undo noting of observation */ 1589 fmep->ecurrent = NULL; 1590 if (--ep->count == 0) { 1591 /* unlink it from observations */ 1592 fmep->observations = ep->observations; 1593 ep->observations = NULL; 1594 nvlist_free(ep->nvp); 1595 ep->nvp = NULL; 1596 } else { 1597 nvlist_free(ep->nvp); 1598 ep->nvp = pre_peek_nvp; 1599 } 1600 } 1601 } 1602 1603 if (matched) 1604 return; /* explained by at least one existing FME */ 1605 1606 /* clean up closed fmes */ 1607 cfmep = ClosedFMEs; 1608 while (cfmep != NULL) { 1609 svfmep = cfmep->next; 1610 destroy_fme(cfmep); 1611 cfmep = svfmep; 1612 } 1613 ClosedFMEs = NULL; 1614 prune_propagations(eventstring, ipp); 1615 1616 if (ofmep) { 1617 out(O_ALTFP|O_NONL, "["); 1618 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1619 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1620 if (ffep) 1621 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1622 1623 return; 1624 1625 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1626 out(O_ALTFP|O_NONL, "["); 1627 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1628 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1629 1630 fmcase = fmd_case_open(hdl, NULL); 1631 1632 /* Create overflow fme */ 1633 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1634 out(O_ALTFP|O_NONL, "["); 1635 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1636 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1637 publish_undiagnosable(hdl, ffep, fmcase); 1638 return; 1639 } 1640 1641 Open_fme_count++; 1642 1643 init_fme_bufs(fmep); 1644 fmep->overflow = B_TRUE; 1645 1646 if (ffep) 1647 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1648 1649 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1650 NULL, NULL, NULL); 1651 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1652 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1653 fmd_case_solve(hdl, fmep->fmcase); 1654 return; 1655 } 1656 1657 /* open a case */ 1658 fmcase = fmd_case_open(hdl, NULL); 1659 1660 /* start a new FME */ 1661 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1662 out(O_ALTFP|O_NONL, "["); 1663 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1664 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1665 publish_undiagnosable(hdl, ffep, fmcase); 1666 return; 1667 } 1668 1669 Open_fme_count++; 1670 1671 init_fme_bufs(fmep); 1672 1673 out(O_ALTFP|O_NONL, "["); 1674 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1675 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1676 fmd_case_uuid(hdl, fmep->fmcase)); 1677 1678 ep = fmep->e0; 1679 ASSERT(ep != NULL); 1680 1681 /* note observation */ 1682 fmep->ecurrent = ep; 1683 if (ep->count++ == 0) { 1684 /* link it into list of observations seen */ 1685 ep->observations = fmep->observations; 1686 fmep->observations = ep; 1687 ep->nvp = evnv_dupnvl(nvl); 1688 serialize_observation(fmep, eventstring, ipp); 1689 } else { 1690 /* new payload overrides any previous */ 1691 nvlist_free(ep->nvp); 1692 ep->nvp = evnv_dupnvl(nvl); 1693 } 1694 1695 stats_counter_bump(fmep->Rcount); 1696 1697 if (ffep) { 1698 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1699 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1700 fmep->e0r = ffep; 1701 } 1702 1703 /* give the diagnosis algorithm a shot at the new FME state */ 1704 fme_eval(fmep, ffep); 1705 } 1706 1707 void 1708 fme_status(int flags) 1709 { 1710 struct fme *fmep; 1711 1712 if (FMElist == NULL) { 1713 out(flags, "No fault management exercises underway."); 1714 return; 1715 } 1716 1717 for (fmep = FMElist; fmep; fmep = fmep->next) 1718 fme_print(flags, fmep); 1719 } 1720 1721 /* 1722 * "indent" routines used mostly for nicely formatted debug output, but also 1723 * for sanity checking for infinite recursion bugs. 1724 */ 1725 1726 #define MAX_INDENT 1024 1727 static const char *indent_s[MAX_INDENT]; 1728 static int current_indent; 1729 1730 static void 1731 indent_push(const char *s) 1732 { 1733 if (current_indent < MAX_INDENT) 1734 indent_s[current_indent++] = s; 1735 else 1736 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1737 } 1738 1739 static void 1740 indent_set(const char *s) 1741 { 1742 current_indent = 0; 1743 indent_push(s); 1744 } 1745 1746 static void 1747 indent_pop(void) 1748 { 1749 if (current_indent > 0) 1750 current_indent--; 1751 else 1752 out(O_DIE, "recursion underflow"); 1753 } 1754 1755 static void 1756 indent(void) 1757 { 1758 int i; 1759 if (!Verbose) 1760 return; 1761 for (i = 0; i < current_indent; i++) 1762 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1763 } 1764 1765 #define SLNEW 1 1766 #define SLCHANGED 2 1767 #define SLWAIT 3 1768 #define SLDISPROVED 4 1769 1770 static void 1771 print_suspects(int circumstance, struct fme *fmep) 1772 { 1773 struct event *ep; 1774 1775 out(O_ALTFP|O_NONL, "["); 1776 if (circumstance == SLCHANGED) { 1777 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1778 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1779 } else if (circumstance == SLWAIT) { 1780 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id, 1781 fmep->timer); 1782 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1783 } else if (circumstance == SLDISPROVED) { 1784 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1785 } else { 1786 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1787 } 1788 1789 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1790 out(O_ALTFP, "]"); 1791 return; 1792 } 1793 1794 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1795 out(O_ALTFP|O_NONL, " "); 1796 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1797 } 1798 out(O_ALTFP, "]"); 1799 } 1800 1801 static struct node * 1802 eventprop_lookup(struct event *ep, const char *propname) 1803 { 1804 return (lut_lookup(ep->props, (void *)propname, NULL)); 1805 } 1806 1807 #define MAXDIGITIDX 23 1808 static char numbuf[MAXDIGITIDX + 1]; 1809 1810 static int 1811 node2uint(struct node *n, uint_t *valp) 1812 { 1813 struct evalue value; 1814 struct lut *globals = NULL; 1815 1816 if (n == NULL) 1817 return (1); 1818 1819 /* 1820 * check value.v since we are being asked to convert an unsigned 1821 * long long int to an unsigned int 1822 */ 1823 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1824 value.t != UINT64 || value.v > (1ULL << 32)) 1825 return (1); 1826 1827 *valp = (uint_t)value.v; 1828 1829 return (0); 1830 } 1831 1832 static nvlist_t * 1833 node2fmri(struct node *n) 1834 { 1835 nvlist_t **pa, *f, *p; 1836 struct node *nc; 1837 uint_t depth = 0; 1838 char *numstr, *nullbyte; 1839 char *failure; 1840 int err, i; 1841 1842 /* XXX do we need to be able to handle a non-T_NAME node? */ 1843 if (n == NULL || n->t != T_NAME) 1844 return (NULL); 1845 1846 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1847 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1848 break; 1849 depth++; 1850 } 1851 1852 if (nc != NULL) { 1853 /* We bailed early, something went wrong */ 1854 return (NULL); 1855 } 1856 1857 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1858 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1859 pa = alloca(depth * sizeof (nvlist_t *)); 1860 for (i = 0; i < depth; i++) 1861 pa[i] = NULL; 1862 1863 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1864 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1865 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1866 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1867 if (err != 0) { 1868 failure = "basic construction of FMRI failed"; 1869 goto boom; 1870 } 1871 1872 numbuf[MAXDIGITIDX] = '\0'; 1873 nullbyte = &numbuf[MAXDIGITIDX]; 1874 i = 0; 1875 1876 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1877 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1878 if (err != 0) { 1879 failure = "alloc of an hc-pair failed"; 1880 goto boom; 1881 } 1882 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1883 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1884 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1885 if (err != 0) { 1886 failure = "construction of an hc-pair failed"; 1887 goto boom; 1888 } 1889 pa[i++] = p; 1890 } 1891 1892 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1893 if (err == 0) { 1894 for (i = 0; i < depth; i++) 1895 if (pa[i] != NULL) 1896 nvlist_free(pa[i]); 1897 return (f); 1898 } 1899 failure = "addition of hc-pair array to FMRI failed"; 1900 1901 boom: 1902 for (i = 0; i < depth; i++) 1903 if (pa[i] != NULL) 1904 nvlist_free(pa[i]); 1905 nvlist_free(f); 1906 out(O_DIE, "%s", failure); 1907 /*NOTREACHED*/ 1908 return (NULL); 1909 } 1910 1911 /* an ipath cache entry is an array of these, with s==NULL at the end */ 1912 struct ipath { 1913 const char *s; /* component name (in stable) */ 1914 int i; /* instance number */ 1915 }; 1916 1917 static nvlist_t * 1918 ipath2fmri(struct ipath *ipath) 1919 { 1920 nvlist_t **pa, *f, *p; 1921 uint_t depth = 0; 1922 char *numstr, *nullbyte; 1923 char *failure; 1924 int err, i; 1925 struct ipath *ipp; 1926 1927 for (ipp = ipath; ipp->s != NULL; ipp++) 1928 depth++; 1929 1930 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1931 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1932 pa = alloca(depth * sizeof (nvlist_t *)); 1933 for (i = 0; i < depth; i++) 1934 pa[i] = NULL; 1935 1936 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1937 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1938 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1939 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1940 if (err != 0) { 1941 failure = "basic construction of FMRI failed"; 1942 goto boom; 1943 } 1944 1945 numbuf[MAXDIGITIDX] = '\0'; 1946 nullbyte = &numbuf[MAXDIGITIDX]; 1947 i = 0; 1948 1949 for (ipp = ipath; ipp->s != NULL; ipp++) { 1950 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1951 if (err != 0) { 1952 failure = "alloc of an hc-pair failed"; 1953 goto boom; 1954 } 1955 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s); 1956 numstr = ulltostr(ipp->i, nullbyte); 1957 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1958 if (err != 0) { 1959 failure = "construction of an hc-pair failed"; 1960 goto boom; 1961 } 1962 pa[i++] = p; 1963 } 1964 1965 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1966 if (err == 0) { 1967 for (i = 0; i < depth; i++) 1968 if (pa[i] != NULL) 1969 nvlist_free(pa[i]); 1970 return (f); 1971 } 1972 failure = "addition of hc-pair array to FMRI failed"; 1973 1974 boom: 1975 for (i = 0; i < depth; i++) 1976 if (pa[i] != NULL) 1977 nvlist_free(pa[i]); 1978 nvlist_free(f); 1979 out(O_DIE, "%s", failure); 1980 /*NOTREACHED*/ 1981 return (NULL); 1982 } 1983 1984 static uint_t 1985 avg(uint_t sum, uint_t cnt) 1986 { 1987 unsigned long long s = sum * 10; 1988 1989 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1990 } 1991 1992 static uint8_t 1993 percentof(uint_t part, uint_t whole) 1994 { 1995 unsigned long long p = part * 1000; 1996 1997 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1998 } 1999 2000 struct rsl { 2001 struct event *suspect; 2002 nvlist_t *asru; 2003 nvlist_t *fru; 2004 nvlist_t *rsrc; 2005 }; 2006 2007 /* 2008 * rslfree -- free internal members of struct rsl not expected to be 2009 * freed elsewhere. 2010 */ 2011 static void 2012 rslfree(struct rsl *freeme) 2013 { 2014 if (freeme->asru != NULL) 2015 nvlist_free(freeme->asru); 2016 if (freeme->fru != NULL) 2017 nvlist_free(freeme->fru); 2018 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 2019 nvlist_free(freeme->rsrc); 2020 } 2021 2022 /* 2023 * rslcmp -- compare two rsl structures. Use the following 2024 * comparisons to establish cardinality: 2025 * 2026 * 1. Name of the suspect's class. (simple strcmp) 2027 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 2028 * 2029 */ 2030 static int 2031 rslcmp(const void *a, const void *b) 2032 { 2033 struct rsl *r1 = (struct rsl *)a; 2034 struct rsl *r2 = (struct rsl *)b; 2035 int rv; 2036 2037 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 2038 r2->suspect->enode->u.event.ename->u.name.s); 2039 if (rv != 0) 2040 return (rv); 2041 2042 if (r1->asru == NULL && r2->asru == NULL) 2043 return (0); 2044 if (r1->asru == NULL) 2045 return (-1); 2046 if (r2->asru == NULL) 2047 return (1); 2048 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 2049 } 2050 2051 /* 2052 * rsluniq -- given an array of rsl structures, seek out and "remove" 2053 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 2054 * of the array element. Removal also means updating the number of 2055 * problems and the number of problems which are not faults. User 2056 * provides the first and last element pointers. 2057 */ 2058 static void 2059 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 2060 { 2061 struct rsl *cr; 2062 2063 if (*nprobs == 1) 2064 return; 2065 2066 /* 2067 * At this point, we only expect duplicate defects. 2068 * Eversholt's diagnosis algorithm prevents duplicate 2069 * suspects, but we rewrite defects in the platform code after 2070 * the diagnosis is made, and that can introduce new 2071 * duplicates. 2072 */ 2073 while (first <= last) { 2074 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 2075 first++; 2076 continue; 2077 } 2078 cr = first + 1; 2079 while (cr <= last) { 2080 if (is_defect(first->suspect->t)) { 2081 if (rslcmp(first, cr) == 0) { 2082 cr->suspect = NULL; 2083 rslfree(cr); 2084 (*nprobs)--; 2085 (*nnonf)--; 2086 } 2087 } 2088 /* 2089 * assume all defects are in order after our 2090 * sort and short circuit here with "else break" ? 2091 */ 2092 cr++; 2093 } 2094 first++; 2095 } 2096 } 2097 2098 /* 2099 * get_resources -- for a given suspect, determine what ASRU, FRU and 2100 * RSRC nvlists should be advertised in the final suspect list. 2101 */ 2102 void 2103 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 2104 { 2105 struct node *asrudef, *frudef; 2106 nvlist_t *asru, *fru; 2107 nvlist_t *rsrc = NULL; 2108 char *pathstr; 2109 2110 /* 2111 * First find any ASRU and/or FRU defined in the 2112 * initial fault tree. 2113 */ 2114 asrudef = eventprop_lookup(sp, L_ASRU); 2115 frudef = eventprop_lookup(sp, L_FRU); 2116 2117 /* 2118 * Create FMRIs based on those definitions 2119 */ 2120 asru = node2fmri(asrudef); 2121 fru = node2fmri(frudef); 2122 pathstr = ipath2str(NULL, sp->ipp); 2123 2124 /* 2125 * Allow for platform translations of the FMRIs 2126 */ 2127 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 2128 pathstr); 2129 2130 FREE(pathstr); 2131 rsrcs->suspect = sp; 2132 rsrcs->asru = asru; 2133 rsrcs->fru = fru; 2134 rsrcs->rsrc = rsrc; 2135 } 2136 2137 /* 2138 * trim_suspects -- prior to publishing, we may need to remove some 2139 * suspects from the list. If we're auto-closing upsets, we don't 2140 * want any of those in the published list. If the ASRUs for multiple 2141 * defects resolve to the same ASRU (driver) we only want to publish 2142 * that as a single suspect. 2143 */ 2144 static void 2145 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 2146 struct rsl **end) 2147 { 2148 struct event *ep; 2149 struct rsl *rp; 2150 int rpcnt; 2151 2152 /* 2153 * First save the suspects in the psuspects, then copy back 2154 * only the ones we wish to retain. This resets nsuspects to 2155 * zero. 2156 */ 2157 rpcnt = fmep->nsuspects; 2158 save_suspects(fmep); 2159 2160 /* 2161 * allocate an array of resource pointers for the suspects. 2162 * We may end up using less than the full allocation, but this 2163 * is a very short-lived array. publish_suspects() will free 2164 * this array when it's done using it. 2165 */ 2166 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 2167 bzero(rp, rpcnt * sizeof (struct rsl)); 2168 2169 /* first pass, remove any unwanted upsets and populate our array */ 2170 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 2171 if (no_upsets && is_upset(ep->t)) 2172 continue; 2173 get_resources(ep, rp, fmep->config); 2174 rp++; 2175 fmep->nsuspects++; 2176 if (!is_fault(ep->t)) 2177 fmep->nonfault++; 2178 } 2179 2180 /* if all we had was unwanted upsets, we're done */ 2181 if (fmep->nsuspects == 0) 2182 return; 2183 2184 *end = rp - 1; 2185 2186 /* sort the array */ 2187 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 2188 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 2189 } 2190 2191 /* 2192 * addpayloadprop -- add a payload prop to a problem 2193 */ 2194 static void 2195 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 2196 { 2197 ASSERT(fault != NULL); 2198 ASSERT(lhs != NULL); 2199 ASSERT(rhs != NULL); 2200 2201 if (rhs->t == UINT64) { 2202 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 2203 2204 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 2205 out(O_DIE, 2206 "cannot add payloadprop \"%s\" to fault", lhs); 2207 } else { 2208 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 2209 lhs, (char *)(uintptr_t)rhs->v); 2210 2211 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 2212 0) 2213 out(O_DIE, 2214 "cannot add payloadprop \"%s\" to fault", lhs); 2215 } 2216 } 2217 2218 static char *Istatbuf; 2219 static char *Istatbufptr; 2220 static int Istatsz; 2221 2222 /* 2223 * istataddsize -- calculate size of istat and add it to Istatsz 2224 */ 2225 /*ARGSUSED2*/ 2226 static void 2227 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2228 { 2229 int val; 2230 2231 ASSERT(lhs != NULL); 2232 ASSERT(rhs != NULL); 2233 2234 if ((val = stats_counter_value(rhs)) == 0) 2235 return; /* skip zero-valued stats */ 2236 2237 /* count up the size of the stat name */ 2238 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 2239 Istatsz++; /* for the trailing NULL byte */ 2240 2241 /* count up the size of the stat value */ 2242 Istatsz += snprintf(NULL, 0, "%d", val); 2243 Istatsz++; /* for the trailing NULL byte */ 2244 } 2245 2246 /* 2247 * istat2str -- serialize an istat, writing result to *Istatbufptr 2248 */ 2249 /*ARGSUSED2*/ 2250 static void 2251 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2252 { 2253 char *str; 2254 int len; 2255 int val; 2256 2257 ASSERT(lhs != NULL); 2258 ASSERT(rhs != NULL); 2259 2260 if ((val = stats_counter_value(rhs)) == 0) 2261 return; /* skip zero-valued stats */ 2262 2263 /* serialize the stat name */ 2264 str = ipath2str(lhs->ename, lhs->ipath); 2265 len = strlen(str); 2266 2267 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 2268 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 2269 Istatbufptr += len; 2270 FREE(str); 2271 *Istatbufptr++ = '\0'; 2272 2273 /* serialize the stat value */ 2274 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 2275 "%d", val); 2276 *Istatbufptr++ = '\0'; 2277 2278 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 2279 } 2280 2281 void 2282 istat_save() 2283 { 2284 if (Istat_need_save == 0) 2285 return; 2286 2287 /* figure out how big the serialzed info is */ 2288 Istatsz = 0; 2289 lut_walk(Istats, (lut_cb)istataddsize, NULL); 2290 2291 if (Istatsz == 0) { 2292 /* no stats to save */ 2293 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2294 return; 2295 } 2296 2297 /* create the serialized buffer */ 2298 Istatbufptr = Istatbuf = MALLOC(Istatsz); 2299 lut_walk(Istats, (lut_cb)istat2str, NULL); 2300 2301 /* clear out current saved stats */ 2302 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2303 2304 /* write out the new version */ 2305 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 2306 FREE(Istatbuf); 2307 2308 Istat_need_save = 0; 2309 } 2310 2311 int 2312 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 2313 { 2314 if (ent1->ename != ent2->ename) 2315 return (ent2->ename - ent1->ename); 2316 if (ent1->ipath != ent2->ipath) 2317 return ((char *)ent2->ipath - (char *)ent1->ipath); 2318 2319 return (0); 2320 } 2321 2322 /* 2323 * istat-verify -- verify the component associated with a stat still exists 2324 * 2325 * if the component no longer exists, this routine resets the stat and 2326 * returns 0. if the component still exists, it returns 1. 2327 */ 2328 static int 2329 istat_verify(struct node *snp, struct istat_entry *entp) 2330 { 2331 struct stats *statp; 2332 nvlist_t *fmri; 2333 2334 fmri = node2fmri(snp->u.event.epname); 2335 if (platform_path_exists(fmri)) { 2336 nvlist_free(fmri); 2337 return (1); 2338 } 2339 nvlist_free(fmri); 2340 2341 /* component no longer in system. zero out the associated stats */ 2342 if ((statp = (struct stats *) 2343 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 2344 stats_counter_value(statp) == 0) 2345 return (0); /* stat is already reset */ 2346 2347 Istat_need_save = 1; 2348 stats_counter_reset(statp); 2349 return (0); 2350 } 2351 2352 static void 2353 istat_bump(struct node *snp, int n) 2354 { 2355 struct stats *statp; 2356 struct istat_entry ent; 2357 2358 ASSERT(snp != NULL); 2359 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 2360 ASSERT(snp->u.event.epname != NULL); 2361 2362 /* class name should be hoisted into a single stable entry */ 2363 ASSERT(snp->u.event.ename->u.name.next == NULL); 2364 ent.ename = snp->u.event.ename->u.name.s; 2365 ent.ipath = ipath(snp->u.event.epname); 2366 2367 if (!istat_verify(snp, &ent)) { 2368 /* component no longer exists in system, nothing to do */ 2369 return; 2370 } 2371 2372 if ((statp = (struct stats *) 2373 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 2374 /* need to create the counter */ 2375 int cnt = 0; 2376 struct node *np; 2377 char *sname; 2378 char *snamep; 2379 struct istat_entry *newentp; 2380 2381 /* count up the size of the stat name */ 2382 np = snp->u.event.ename; 2383 while (np != NULL) { 2384 cnt += strlen(np->u.name.s); 2385 cnt++; /* for the '.' or '@' */ 2386 np = np->u.name.next; 2387 } 2388 np = snp->u.event.epname; 2389 while (np != NULL) { 2390 cnt += snprintf(NULL, 0, "%s%llu", 2391 np->u.name.s, np->u.name.child->u.ull); 2392 cnt++; /* for the '/' or trailing NULL byte */ 2393 np = np->u.name.next; 2394 } 2395 2396 /* build the stat name */ 2397 snamep = sname = alloca(cnt); 2398 np = snp->u.event.ename; 2399 while (np != NULL) { 2400 snamep += snprintf(snamep, &sname[cnt] - snamep, 2401 "%s", np->u.name.s); 2402 np = np->u.name.next; 2403 if (np) 2404 *snamep++ = '.'; 2405 } 2406 *snamep++ = '@'; 2407 np = snp->u.event.epname; 2408 while (np != NULL) { 2409 snamep += snprintf(snamep, &sname[cnt] - snamep, 2410 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 2411 np = np->u.name.next; 2412 if (np) 2413 *snamep++ = '/'; 2414 } 2415 *snamep++ = '\0'; 2416 2417 /* create the new stat & add it to our list */ 2418 newentp = MALLOC(sizeof (*newentp)); 2419 *newentp = ent; 2420 statp = stats_new_counter(NULL, sname, 0); 2421 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 2422 (lut_cmp)istat_cmp); 2423 } 2424 2425 /* if n is non-zero, set that value instead of bumping */ 2426 if (n) { 2427 stats_counter_reset(statp); 2428 stats_counter_add(statp, n); 2429 } else 2430 stats_counter_bump(statp); 2431 Istat_need_save = 1; 2432 2433 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 2434 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 2435 stats_counter_value(statp)); 2436 } 2437 2438 /*ARGSUSED*/ 2439 static void 2440 istat_destructor(void *left, void *right, void *arg) 2441 { 2442 struct istat_entry *entp = (struct istat_entry *)left; 2443 struct stats *statp = (struct stats *)right; 2444 FREE(entp); 2445 stats_delete(statp); 2446 } 2447 2448 /* 2449 * Callback used in a walk of the Istats to reset matching stat counters. 2450 */ 2451 static void 2452 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 2453 const struct ipath *ipp) 2454 { 2455 char *path; 2456 2457 if (entp->ipath == ipp) { 2458 path = ipath2str(entp->ename, ipp); 2459 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 2460 FREE(path); 2461 stats_counter_reset(statp); 2462 Istat_need_save = 1; 2463 } 2464 } 2465 2466 /*ARGSUSED*/ 2467 static void 2468 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp, 2469 void *unused) 2470 { 2471 char *path; 2472 nvlist_t *fmri; 2473 2474 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2475 if (!platform_path_exists(fmri)) { 2476 path = ipath2str(entp->ename, entp->ipath); 2477 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path); 2478 FREE(path); 2479 stats_counter_reset(statp); 2480 Istat_need_save = 1; 2481 } 2482 nvlist_free(fmri); 2483 } 2484 2485 void 2486 istat_fini(void) 2487 { 2488 lut_free(Istats, istat_destructor, NULL); 2489 } 2490 2491 static char *Serdbuf; 2492 static char *Serdbufptr; 2493 static int Serdsz; 2494 2495 /* 2496 * serdaddsize -- calculate size of serd and add it to Serdsz 2497 */ 2498 /*ARGSUSED*/ 2499 static void 2500 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2501 { 2502 ASSERT(lhs != NULL); 2503 2504 /* count up the size of the stat name */ 2505 Serdsz += ipath2strlen(lhs->ename, lhs->ipath); 2506 Serdsz++; /* for the trailing NULL byte */ 2507 } 2508 2509 /* 2510 * serd2str -- serialize a serd engine, writing result to *Serdbufptr 2511 */ 2512 /*ARGSUSED*/ 2513 static void 2514 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2515 { 2516 char *str; 2517 int len; 2518 2519 ASSERT(lhs != NULL); 2520 2521 /* serialize the serd engine name */ 2522 str = ipath2str(lhs->ename, lhs->ipath); 2523 len = strlen(str); 2524 2525 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]); 2526 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr); 2527 Serdbufptr += len; 2528 FREE(str); 2529 *Serdbufptr++ = '\0'; 2530 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]); 2531 } 2532 2533 void 2534 serd_save() 2535 { 2536 if (Serd_need_save == 0) 2537 return; 2538 2539 /* figure out how big the serialzed info is */ 2540 Serdsz = 0; 2541 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL); 2542 2543 if (Serdsz == 0) { 2544 /* no serd engines to save */ 2545 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2546 return; 2547 } 2548 2549 /* create the serialized buffer */ 2550 Serdbufptr = Serdbuf = MALLOC(Serdsz); 2551 lut_walk(SerdEngines, (lut_cb)serd2str, NULL); 2552 2553 /* clear out current saved stats */ 2554 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2555 2556 /* write out the new version */ 2557 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz); 2558 FREE(Serdbuf); 2559 Serd_need_save = 0; 2560 } 2561 2562 int 2563 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2) 2564 { 2565 if (ent1->ename != ent2->ename) 2566 return (ent2->ename - ent1->ename); 2567 if (ent1->ipath != ent2->ipath) 2568 return ((char *)ent2->ipath - (char *)ent1->ipath); 2569 2570 return (0); 2571 } 2572 2573 void 2574 fme_serd_load(fmd_hdl_t *hdl) 2575 { 2576 int sz; 2577 char *sbuf; 2578 char *sepptr; 2579 char *ptr; 2580 struct serd_entry *newentp; 2581 struct node *epname; 2582 nvlist_t *fmri; 2583 char *namestring; 2584 2585 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0) 2586 return; 2587 sbuf = alloca(sz); 2588 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz); 2589 ptr = sbuf; 2590 while (ptr < &sbuf[sz]) { 2591 sepptr = strchr(ptr, '@'); 2592 *sepptr = '\0'; 2593 namestring = ptr; 2594 sepptr++; 2595 ptr = sepptr; 2596 ptr += strlen(ptr); 2597 ptr++; /* move past the '\0' separating paths */ 2598 epname = pathstring2epnamenp(sepptr); 2599 fmri = node2fmri(epname); 2600 if (platform_path_exists(fmri)) { 2601 newentp = MALLOC(sizeof (*newentp)); 2602 newentp->hdl = hdl; 2603 newentp->ipath = ipath(epname); 2604 newentp->ename = stable(namestring); 2605 SerdEngines = lut_add(SerdEngines, (void *)newentp, 2606 (void *)newentp, (lut_cmp)serd_cmp); 2607 } else 2608 Serd_need_save = 1; 2609 tree_free(epname); 2610 nvlist_free(fmri); 2611 } 2612 /* save it back again in case some of the paths no longer exist */ 2613 serd_save(); 2614 } 2615 2616 /*ARGSUSED*/ 2617 static void 2618 serd_destructor(void *left, void *right, void *arg) 2619 { 2620 struct serd_entry *entp = (struct serd_entry *)left; 2621 FREE(entp); 2622 } 2623 2624 /* 2625 * Callback used in a walk of the SerdEngines to reset matching serd engines. 2626 */ 2627 /*ARGSUSED*/ 2628 static void 2629 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp) 2630 { 2631 char *path; 2632 2633 if (entp->ipath == ipp) { 2634 path = ipath2str(entp->ename, ipp); 2635 out(O_ALTFP, "serd_reset_cb: resetting %s", path); 2636 fmd_serd_reset(entp->hdl, path); 2637 FREE(path); 2638 Serd_need_save = 1; 2639 } 2640 } 2641 2642 /*ARGSUSED*/ 2643 static void 2644 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2) 2645 { 2646 char *path; 2647 nvlist_t *fmri; 2648 2649 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2650 if (!platform_path_exists(fmri)) { 2651 path = ipath2str(entp->ename, entp->ipath); 2652 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path); 2653 fmd_serd_reset(entp->hdl, path); 2654 FREE(path); 2655 Serd_need_save = 1; 2656 } 2657 nvlist_free(fmri); 2658 } 2659 2660 void 2661 serd_fini(void) 2662 { 2663 lut_free(SerdEngines, serd_destructor, NULL); 2664 } 2665 2666 static void 2667 publish_suspects(struct fme *fmep) 2668 { 2669 struct rsl *srl = NULL; 2670 struct rsl *erl; 2671 struct rsl *rp; 2672 nvlist_t *fault; 2673 uint8_t cert; 2674 uint_t *frs; 2675 uint_t fravg, frsum, fr; 2676 uint_t messval; 2677 struct node *snp; 2678 int frcnt, fridx; 2679 boolean_t no_upsets = B_FALSE; 2680 boolean_t allfaulty = B_TRUE; 2681 2682 stats_counter_bump(fmep->diags); 2683 2684 /* 2685 * If we're auto-closing upsets, we don't want to include them 2686 * in any produced suspect lists or certainty accounting. 2687 */ 2688 if (Autoclose != NULL) 2689 if (strcmp(Autoclose, "true") == 0 || 2690 strcmp(Autoclose, "all") == 0 || 2691 strcmp(Autoclose, "upsets") == 0) 2692 no_upsets = B_TRUE; 2693 2694 trim_suspects(fmep, no_upsets, &srl, &erl); 2695 2696 /* 2697 * If the resulting suspect list has no members, we're 2698 * done. Returning here will simply close the case. 2699 */ 2700 if (fmep->nsuspects == 0) { 2701 out(O_ALTFP, 2702 "[FME%d, case %s (all suspects are upsets)]", 2703 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2704 FREE(srl); 2705 restore_suspects(fmep); 2706 return; 2707 } 2708 2709 /* 2710 * If the suspect list is all faults, then for a given fault, 2711 * say X of N, X's certainty is computed via: 2712 * 2713 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 2714 * 2715 * If none of the suspects are faults, and there are N suspects, 2716 * the certainty of a given suspect is 100/N. 2717 * 2718 * If there are are a mixture of faults and other problems in 2719 * the suspect list, we take an average of the faults' 2720 * FITrates and treat this average as the FITrate for any 2721 * non-faults. The fitrate of any given suspect is then 2722 * computed per the first formula above. 2723 */ 2724 if (fmep->nonfault == fmep->nsuspects) { 2725 /* NO faults in the suspect list */ 2726 cert = percentof(1, fmep->nsuspects); 2727 } else { 2728 /* sum the fitrates */ 2729 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2730 fridx = frcnt = frsum = 0; 2731 2732 for (rp = srl; rp <= erl; rp++) { 2733 struct node *n; 2734 2735 if (rp->suspect == NULL) 2736 continue; 2737 if (!is_fault(rp->suspect->t)) { 2738 frs[fridx++] = 0; 2739 continue; 2740 } 2741 n = eventprop_lookup(rp->suspect, L_FITrate); 2742 if (node2uint(n, &fr) != 0) { 2743 out(O_DEBUG|O_NONL, "event "); 2744 ipath_print(O_DEBUG|O_NONL, 2745 rp->suspect->enode->u.event.ename->u.name.s, 2746 rp->suspect->ipp); 2747 out(O_DEBUG, " has no FITrate (using 1)"); 2748 fr = 1; 2749 } else if (fr == 0) { 2750 out(O_DEBUG|O_NONL, "event "); 2751 ipath_print(O_DEBUG|O_NONL, 2752 rp->suspect->enode->u.event.ename->u.name.s, 2753 rp->suspect->ipp); 2754 out(O_DEBUG, " has zero FITrate (using 1)"); 2755 fr = 1; 2756 } 2757 2758 frs[fridx++] = fr; 2759 frsum += fr; 2760 frcnt++; 2761 } 2762 fravg = avg(frsum, frcnt); 2763 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2764 if (frs[fridx] == 0) { 2765 frs[fridx] = fravg; 2766 frsum += fravg; 2767 } 2768 } 2769 2770 /* Add them in reverse order of our sort, as fmd reverses order */ 2771 for (rp = erl; rp >= srl; rp--) { 2772 if (rp->suspect == NULL) 2773 continue; 2774 if (!is_fault(rp->suspect->t)) 2775 allfaulty = B_FALSE; 2776 if (fmep->nonfault != fmep->nsuspects) 2777 cert = percentof(frs[--fridx], frsum); 2778 fault = fmd_nvl_create_fault(fmep->hdl, 2779 rp->suspect->enode->u.event.ename->u.name.s, 2780 cert, 2781 rp->asru, 2782 rp->fru, 2783 rp->rsrc); 2784 if (fault == NULL) 2785 out(O_DIE, "fault creation failed"); 2786 /* if "message" property exists, add it to the fault */ 2787 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2788 &messval) == 0) { 2789 2790 out(O_ALTFP, 2791 "[FME%d, %s adds message=%d to suspect list]", 2792 fmep->id, 2793 rp->suspect->enode->u.event.ename->u.name.s, 2794 messval); 2795 if (nvlist_add_boolean_value(fault, 2796 FM_SUSPECT_MESSAGE, 2797 (messval) ? B_TRUE : B_FALSE) != 0) { 2798 out(O_DIE, "cannot add no-message to fault"); 2799 } 2800 } 2801 /* add any payload properties */ 2802 lut_walk(rp->suspect->payloadprops, 2803 (lut_cb)addpayloadprop, (void *)fault); 2804 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2805 rslfree(rp); 2806 2807 /* 2808 * If "action" property exists, evaluate it; this must be done 2809 * before the dupclose check below since some actions may 2810 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2811 * needs to be restructured if any new actions are introduced 2812 * that have effects that we do not want to be visible if 2813 * we decide not to publish in the dupclose check below. 2814 */ 2815 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2816 struct evalue evalue; 2817 2818 out(O_ALTFP|O_NONL, 2819 "[FME%d, %s action ", fmep->id, 2820 rp->suspect->enode->u.event.ename->u.name.s); 2821 ptree_name_iter(O_ALTFP|O_NONL, snp); 2822 out(O_ALTFP, "]"); 2823 Action_nvl = fault; 2824 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2825 NULL, 0, &evalue); 2826 } 2827 2828 /* 2829 * check if the asru is already marked as "faulty". 2830 */ 2831 if (allfaulty) { 2832 nvlist_t *asru; 2833 2834 out(O_ALTFP|O_VERB, "FMD%d dup check ", fmep->id); 2835 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2836 out(O_ALTFP|O_VERB|O_NONL, " "); 2837 if (nvlist_lookup_nvlist(fault, 2838 FM_FAULT_ASRU, &asru) != 0) { 2839 out(O_ALTFP|O_VERB, "NULL asru"); 2840 allfaulty = B_FALSE; 2841 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2842 out(O_ALTFP|O_VERB, "faulty"); 2843 } else { 2844 out(O_ALTFP|O_VERB, "not faulty"); 2845 allfaulty = B_FALSE; 2846 } 2847 } 2848 2849 } 2850 2851 /* 2852 * We are going to publish so take any pre-publication actions. 2853 */ 2854 if (!allfaulty) { 2855 /* 2856 * don't update the count stat if all asrus are already 2857 * present and unrepaired in the asru cache 2858 */ 2859 for (rp = erl; rp >= srl; rp--) { 2860 struct event *suspect = rp->suspect; 2861 2862 if (suspect == NULL) 2863 continue; 2864 2865 /* if "count" exists, increment the appropriate stat */ 2866 if ((snp = eventprop_lookup(suspect, 2867 L_count)) != NULL) { 2868 out(O_ALTFP|O_NONL, 2869 "[FME%d, %s count ", fmep->id, 2870 suspect->enode->u.event.ename->u.name.s); 2871 ptree_name_iter(O_ALTFP|O_NONL, snp); 2872 out(O_ALTFP, "]"); 2873 istat_bump(snp, 0); 2874 2875 } 2876 } 2877 istat_save(); /* write out any istat changes */ 2878 } 2879 2880 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2881 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2882 fmd_case_solve(fmep->hdl, fmep->fmcase); 2883 2884 /* 2885 * revert to the original suspect list 2886 */ 2887 FREE(srl); 2888 restore_suspects(fmep); 2889 } 2890 2891 static void 2892 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase) 2893 { 2894 struct case_list *newcase; 2895 nvlist_t *defect; 2896 2897 out(O_ALTFP, 2898 "[undiagnosable ereport received, " 2899 "creating and closing a new case (%s)]", 2900 Undiag_reason ? Undiag_reason : "reason not provided"); 2901 2902 newcase = MALLOC(sizeof (struct case_list)); 2903 newcase->next = NULL; 2904 newcase->fmcase = fmcase; 2905 if (Undiagablecaselist != NULL) 2906 newcase->next = Undiagablecaselist; 2907 Undiagablecaselist = newcase; 2908 2909 if (ffep != NULL) 2910 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2911 2912 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2913 NULL, NULL, NULL); 2914 if (Undiag_reason != NULL) 2915 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2916 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2917 2918 fmd_case_solve(hdl, newcase->fmcase); 2919 fmd_case_close(hdl, newcase->fmcase); 2920 } 2921 2922 static void 2923 fme_undiagnosable(struct fme *f) 2924 { 2925 nvlist_t *defect; 2926 2927 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2928 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2929 Undiag_reason ? Undiag_reason : "undiagnosable"); 2930 2931 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2932 NULL, NULL, NULL); 2933 if (Undiag_reason != NULL) 2934 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2935 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2936 fmd_case_solve(f->hdl, f->fmcase); 2937 fmd_case_close(f->hdl, f->fmcase); 2938 } 2939 2940 /* 2941 * fme_close_case 2942 * 2943 * Find the requested case amongst our fmes and close it. Free up 2944 * the related fme. 2945 */ 2946 void 2947 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2948 { 2949 struct case_list *ucasep, *prevcasep = NULL; 2950 struct fme *prev = NULL; 2951 struct fme *fmep; 2952 2953 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2954 if (fmcase != ucasep->fmcase) { 2955 prevcasep = ucasep; 2956 continue; 2957 } 2958 2959 if (prevcasep == NULL) 2960 Undiagablecaselist = Undiagablecaselist->next; 2961 else 2962 prevcasep->next = ucasep->next; 2963 2964 FREE(ucasep); 2965 return; 2966 } 2967 2968 for (fmep = FMElist; fmep; fmep = fmep->next) { 2969 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2970 break; 2971 prev = fmep; 2972 } 2973 2974 if (fmep == NULL) { 2975 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2976 fmd_case_uuid(hdl, fmcase)); 2977 return; 2978 } 2979 2980 if (EFMElist == fmep) 2981 EFMElist = prev; 2982 2983 if (prev == NULL) 2984 FMElist = FMElist->next; 2985 else 2986 prev->next = fmep->next; 2987 2988 fmep->next = NULL; 2989 2990 /* Get rid of any timer this fme has set */ 2991 if (fmep->wull != 0) 2992 fmd_timer_remove(fmep->hdl, fmep->timer); 2993 2994 if (ClosedFMEs == NULL) { 2995 ClosedFMEs = fmep; 2996 } else { 2997 fmep->next = ClosedFMEs; 2998 ClosedFMEs = fmep; 2999 } 3000 3001 Open_fme_count--; 3002 3003 /* See if we can close the overflow FME */ 3004 if (Open_fme_count <= Max_fme) { 3005 for (fmep = FMElist; fmep; fmep = fmep->next) { 3006 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 3007 fmep->fmcase))) 3008 break; 3009 } 3010 3011 if (fmep != NULL) 3012 fmd_case_close(fmep->hdl, fmep->fmcase); 3013 } 3014 } 3015 3016 /* 3017 * fme_set_timer() 3018 * If the time we need to wait for the given FME is less than the 3019 * current timer, kick that old timer out and establish a new one. 3020 */ 3021 static int 3022 fme_set_timer(struct fme *fmep, unsigned long long wull) 3023 { 3024 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 3025 ptree_timeval(O_ALTFP|O_VERB, &wull); 3026 3027 if (wull <= fmep->pull) { 3028 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 3029 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 3030 out(O_ALTFP|O_VERB, NULL); 3031 /* we've waited at least wull already, don't need timer */ 3032 return (0); 3033 } 3034 3035 out(O_ALTFP|O_VERB|O_NONL, " currently "); 3036 if (fmep->wull != 0) { 3037 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 3038 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 3039 out(O_ALTFP|O_VERB, NULL); 3040 } else { 3041 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 3042 out(O_ALTFP|O_VERB, NULL); 3043 } 3044 3045 if (fmep->wull != 0) 3046 if (wull >= fmep->wull) 3047 /* New timer would fire later than established timer */ 3048 return (0); 3049 3050 if (fmep->wull != 0) { 3051 fmd_timer_remove(fmep->hdl, fmep->timer); 3052 } 3053 3054 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 3055 fmep->e0r, wull); 3056 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 3057 fmep->wull = wull; 3058 return (1); 3059 } 3060 3061 void 3062 fme_timer_fired(struct fme *fmep, id_t tid) 3063 { 3064 struct fme *ffmep = NULL; 3065 3066 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 3067 if (ffmep == fmep) 3068 break; 3069 3070 if (ffmep == NULL) { 3071 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 3072 (void *)fmep); 3073 return; 3074 } 3075 3076 out(O_ALTFP|O_VERB, "Timer fired %lx", tid); 3077 fmep->pull = fmep->wull; 3078 fmep->wull = 0; 3079 fmd_buf_write(fmep->hdl, fmep->fmcase, 3080 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 3081 3082 fme_eval(fmep, fmep->e0r); 3083 } 3084 3085 /* 3086 * Preserve the fme's suspect list in its psuspects list, NULLing the 3087 * suspects list in the meantime. 3088 */ 3089 static void 3090 save_suspects(struct fme *fmep) 3091 { 3092 struct event *ep; 3093 struct event *nextep; 3094 3095 /* zero out the previous suspect list */ 3096 for (ep = fmep->psuspects; ep; ep = nextep) { 3097 nextep = ep->psuspects; 3098 ep->psuspects = NULL; 3099 } 3100 fmep->psuspects = NULL; 3101 3102 /* zero out the suspect list, copying it to previous suspect list */ 3103 fmep->psuspects = fmep->suspects; 3104 for (ep = fmep->suspects; ep; ep = nextep) { 3105 nextep = ep->suspects; 3106 ep->psuspects = ep->suspects; 3107 ep->suspects = NULL; 3108 ep->is_suspect = 0; 3109 } 3110 fmep->suspects = NULL; 3111 fmep->nsuspects = 0; 3112 fmep->nonfault = 0; 3113 } 3114 3115 /* 3116 * Retrieve the fme's suspect list from its psuspects list. 3117 */ 3118 static void 3119 restore_suspects(struct fme *fmep) 3120 { 3121 struct event *ep; 3122 struct event *nextep; 3123 3124 fmep->nsuspects = fmep->nonfault = 0; 3125 fmep->suspects = fmep->psuspects; 3126 for (ep = fmep->psuspects; ep; ep = nextep) { 3127 fmep->nsuspects++; 3128 if (!is_fault(ep->t)) 3129 fmep->nonfault++; 3130 nextep = ep->psuspects; 3131 ep->suspects = ep->psuspects; 3132 } 3133 } 3134 3135 /* 3136 * this is what we use to call the Emrys prototype code instead of main() 3137 */ 3138 static void 3139 fme_eval(struct fme *fmep, fmd_event_t *ffep) 3140 { 3141 struct event *ep; 3142 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 3143 3144 save_suspects(fmep); 3145 3146 out(O_ALTFP, "Evaluate FME %d", fmep->id); 3147 indent_set(" "); 3148 3149 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 3150 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 3151 3152 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 3153 fme_state2str(fmep->state)); 3154 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3155 out(O_ALTFP|O_NONL, " "); 3156 itree_pevent_brief(O_ALTFP|O_NONL, ep); 3157 } 3158 out(O_ALTFP, NULL); 3159 3160 switch (fmep->state) { 3161 case FME_CREDIBLE: 3162 print_suspects(SLNEW, fmep); 3163 (void) upsets_eval(fmep, ffep); 3164 3165 /* 3166 * we may have already posted suspects in upsets_eval() which 3167 * can recurse into fme_eval() again. If so then just return. 3168 */ 3169 if (fmep->posted_suspects) 3170 return; 3171 3172 publish_suspects(fmep); 3173 fmep->posted_suspects = 1; 3174 fmd_buf_write(fmep->hdl, fmep->fmcase, 3175 WOBUF_POSTD, 3176 (void *)&fmep->posted_suspects, 3177 sizeof (fmep->posted_suspects)); 3178 3179 /* 3180 * Now the suspects have been posted, we can clear up 3181 * the instance tree as we won't be looking at it again. 3182 * Also cancel the timer as the case is now solved. 3183 */ 3184 if (fmep->wull != 0) { 3185 fmd_timer_remove(fmep->hdl, fmep->timer); 3186 fmep->wull = 0; 3187 } 3188 break; 3189 3190 case FME_WAIT: 3191 ASSERT(my_delay > fmep->ull); 3192 (void) fme_set_timer(fmep, my_delay); 3193 print_suspects(SLWAIT, fmep); 3194 itree_prune(fmep->eventtree); 3195 return; 3196 3197 case FME_DISPROVED: 3198 print_suspects(SLDISPROVED, fmep); 3199 Undiag_reason = UD_UNSOLVD; 3200 fme_undiagnosable(fmep); 3201 break; 3202 } 3203 3204 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 3205 int doclose = 0; 3206 3207 if (strcmp(Autoclose, "true") == 0 || 3208 strcmp(Autoclose, "all") == 0) 3209 doclose = 1; 3210 3211 if (strcmp(Autoclose, "upsets") == 0) { 3212 doclose = 1; 3213 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3214 if (ep->t != N_UPSET) { 3215 doclose = 0; 3216 break; 3217 } 3218 } 3219 } 3220 3221 if (doclose) { 3222 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 3223 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3224 fmd_case_close(fmep->hdl, fmep->fmcase); 3225 } 3226 } 3227 itree_free(fmep->eventtree); 3228 fmep->eventtree = NULL; 3229 structconfig_free(fmep->config); 3230 fmep->config = NULL; 3231 destroy_fme_bufs(fmep); 3232 } 3233 3234 static void indent(void); 3235 static int triggered(struct fme *fmep, struct event *ep, int mark); 3236 static enum fme_state effects_test(struct fme *fmep, 3237 struct event *fault_event, unsigned long long at_latest_by, 3238 unsigned long long *pdelay); 3239 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 3240 unsigned long long at_latest_by, unsigned long long *pdelay); 3241 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 3242 unsigned long long at_latest_by, unsigned long long *pdelay); 3243 3244 static int 3245 checkconstraints(struct fme *fmep, struct arrow *arrowp) 3246 { 3247 struct constraintlist *ctp; 3248 struct evalue value; 3249 char *sep = ""; 3250 3251 if (arrowp->forever_false) { 3252 indent(); 3253 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 3254 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3255 out(O_ALTFP|O_VERB|O_NONL, sep); 3256 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3257 sep = ", "; 3258 } 3259 out(O_ALTFP|O_VERB, NULL); 3260 return (0); 3261 } 3262 if (arrowp->forever_true) { 3263 indent(); 3264 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: "); 3265 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3266 out(O_ALTFP|O_VERB|O_NONL, sep); 3267 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3268 sep = ", "; 3269 } 3270 out(O_ALTFP|O_VERB, NULL); 3271 return (1); 3272 } 3273 3274 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3275 if (eval_expr(ctp->cnode, NULL, NULL, 3276 &fmep->globals, fmep->config, 3277 arrowp, 0, &value)) { 3278 /* evaluation successful */ 3279 if (value.t == UNDEFINED || value.v == 0) { 3280 /* known false */ 3281 arrowp->forever_false = 1; 3282 indent(); 3283 out(O_ALTFP|O_VERB|O_NONL, 3284 " False constraint: "); 3285 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3286 out(O_ALTFP|O_VERB, NULL); 3287 return (0); 3288 } 3289 } else { 3290 /* evaluation unsuccessful -- unknown value */ 3291 indent(); 3292 out(O_ALTFP|O_VERB|O_NONL, 3293 " Deferred constraint: "); 3294 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3295 out(O_ALTFP|O_VERB, NULL); 3296 return (1); 3297 } 3298 } 3299 /* known true */ 3300 arrowp->forever_true = 1; 3301 indent(); 3302 out(O_ALTFP|O_VERB|O_NONL, " True constraint: "); 3303 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3304 out(O_ALTFP|O_VERB|O_NONL, sep); 3305 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3306 sep = ", "; 3307 } 3308 out(O_ALTFP|O_VERB, NULL); 3309 return (1); 3310 } 3311 3312 static int 3313 triggered(struct fme *fmep, struct event *ep, int mark) 3314 { 3315 struct bubble *bp; 3316 struct arrowlist *ap; 3317 int count = 0; 3318 3319 stats_counter_bump(fmep->Tcallcount); 3320 for (bp = itree_next_bubble(ep, NULL); bp; 3321 bp = itree_next_bubble(ep, bp)) { 3322 if (bp->t != B_TO) 3323 continue; 3324 for (ap = itree_next_arrow(bp, NULL); ap; 3325 ap = itree_next_arrow(bp, ap)) { 3326 /* check count of marks against K in the bubble */ 3327 if ((ap->arrowp->mark & mark) && 3328 ++count >= bp->nork) 3329 return (1); 3330 } 3331 } 3332 return (0); 3333 } 3334 3335 static int 3336 mark_arrows(struct fme *fmep, struct event *ep, int mark, 3337 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 3338 { 3339 struct bubble *bp; 3340 struct arrowlist *ap; 3341 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3342 unsigned long long my_delay; 3343 enum fme_state result; 3344 int retval = 0; 3345 3346 for (bp = itree_next_bubble(ep, NULL); bp; 3347 bp = itree_next_bubble(ep, bp)) { 3348 if (bp->t != B_FROM) 3349 continue; 3350 stats_counter_bump(fmep->Marrowcount); 3351 for (ap = itree_next_arrow(bp, NULL); ap; 3352 ap = itree_next_arrow(bp, ap)) { 3353 struct event *ep2 = ap->arrowp->head->myevent; 3354 /* 3355 * if we're clearing marks, we can avoid doing 3356 * all that work evaluating constraints. 3357 */ 3358 if (mark == 0) { 3359 if (ap->arrowp->arrow_marked == 0) 3360 continue; 3361 ap->arrowp->arrow_marked = 0; 3362 ap->arrowp->mark &= ~EFFECTS_COUNTER; 3363 if (keep && (ep2->cached_state & 3364 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 3365 ep2->keep_in_tree = 1; 3366 ep2->cached_state &= 3367 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 3368 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 3369 keep); 3370 continue; 3371 } 3372 ap->arrowp->arrow_marked = 1; 3373 if (ep2->cached_state & REQMNTS_DISPROVED) { 3374 indent(); 3375 out(O_ALTFP|O_VERB|O_NONL, 3376 " ALREADY DISPROVED "); 3377 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3378 out(O_ALTFP|O_VERB, NULL); 3379 continue; 3380 } 3381 if (ep2->cached_state & WAIT_EFFECT) { 3382 indent(); 3383 out(O_ALTFP|O_VERB|O_NONL, 3384 " ALREADY EFFECTS WAIT "); 3385 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3386 out(O_ALTFP|O_VERB, NULL); 3387 continue; 3388 } 3389 if (ep2->cached_state & CREDIBLE_EFFECT) { 3390 indent(); 3391 out(O_ALTFP|O_VERB|O_NONL, 3392 " ALREADY EFFECTS CREDIBLE "); 3393 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3394 out(O_ALTFP|O_VERB, NULL); 3395 continue; 3396 } 3397 if ((ep2->cached_state & PARENT_WAIT) && 3398 (mark & PARENT_WAIT)) { 3399 indent(); 3400 out(O_ALTFP|O_VERB|O_NONL, 3401 " ALREADY PARENT EFFECTS WAIT "); 3402 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3403 out(O_ALTFP|O_VERB, NULL); 3404 continue; 3405 } 3406 platform_set_payloadnvp(ep2->nvp); 3407 if (checkconstraints(fmep, ap->arrowp) == 0) { 3408 platform_set_payloadnvp(NULL); 3409 indent(); 3410 out(O_ALTFP|O_VERB|O_NONL, 3411 " CONSTRAINTS FAIL "); 3412 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3413 out(O_ALTFP|O_VERB, NULL); 3414 continue; 3415 } 3416 platform_set_payloadnvp(NULL); 3417 ap->arrowp->mark |= EFFECTS_COUNTER; 3418 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 3419 indent(); 3420 out(O_ALTFP|O_VERB|O_NONL, 3421 " K-COUNT NOT YET MET "); 3422 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3423 out(O_ALTFP|O_VERB, NULL); 3424 continue; 3425 } 3426 ep2->cached_state &= ~PARENT_WAIT; 3427 /* 3428 * if we've reached an ereport and no propagation time 3429 * is specified, use the Hesitate value 3430 */ 3431 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 3432 ap->arrowp->maxdelay == 0ULL) { 3433 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 3434 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3435 out(O_ALTFP|O_VERB, NULL); 3436 result = requirements_test(fmep, ep2, Hesitate, 3437 &my_delay); 3438 } else { 3439 result = requirements_test(fmep, ep2, 3440 at_latest_by + ap->arrowp->maxdelay, 3441 &my_delay); 3442 } 3443 if (result == FME_WAIT) { 3444 retval = WAIT_EFFECT; 3445 if (overall_delay > my_delay) 3446 overall_delay = my_delay; 3447 ep2->cached_state |= WAIT_EFFECT; 3448 indent(); 3449 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 3450 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3451 out(O_ALTFP|O_VERB, NULL); 3452 indent_push(" E"); 3453 if (mark_arrows(fmep, ep2, PARENT_WAIT, 3454 at_latest_by, &my_delay, 0) == 3455 WAIT_EFFECT) { 3456 retval = WAIT_EFFECT; 3457 if (overall_delay > my_delay) 3458 overall_delay = my_delay; 3459 } 3460 indent_pop(); 3461 } else if (result == FME_DISPROVED) { 3462 indent(); 3463 out(O_ALTFP|O_VERB|O_NONL, 3464 " EFFECTS DISPROVED "); 3465 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3466 out(O_ALTFP|O_VERB, NULL); 3467 } else { 3468 ep2->cached_state |= mark; 3469 indent(); 3470 if (mark == CREDIBLE_EFFECT) 3471 out(O_ALTFP|O_VERB|O_NONL, 3472 " EFFECTS CREDIBLE "); 3473 else 3474 out(O_ALTFP|O_VERB|O_NONL, 3475 " PARENT EFFECTS WAIT "); 3476 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3477 out(O_ALTFP|O_VERB, NULL); 3478 indent_push(" E"); 3479 if (mark_arrows(fmep, ep2, mark, at_latest_by, 3480 &my_delay, 0) == WAIT_EFFECT) { 3481 retval = WAIT_EFFECT; 3482 if (overall_delay > my_delay) 3483 overall_delay = my_delay; 3484 } 3485 indent_pop(); 3486 } 3487 } 3488 } 3489 if (retval == WAIT_EFFECT) 3490 *pdelay = overall_delay; 3491 return (retval); 3492 } 3493 3494 static enum fme_state 3495 effects_test(struct fme *fmep, struct event *fault_event, 3496 unsigned long long at_latest_by, unsigned long long *pdelay) 3497 { 3498 struct event *error_event; 3499 enum fme_state return_value = FME_CREDIBLE; 3500 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3501 unsigned long long my_delay; 3502 3503 stats_counter_bump(fmep->Ecallcount); 3504 indent_push(" E"); 3505 indent(); 3506 out(O_ALTFP|O_VERB|O_NONL, "->"); 3507 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3508 out(O_ALTFP|O_VERB, NULL); 3509 3510 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 3511 &my_delay, 0) == WAIT_EFFECT) { 3512 return_value = FME_WAIT; 3513 if (overall_delay > my_delay) 3514 overall_delay = my_delay; 3515 } 3516 for (error_event = fmep->observations; 3517 error_event; error_event = error_event->observations) { 3518 indent(); 3519 out(O_ALTFP|O_VERB|O_NONL, " "); 3520 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 3521 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 3522 if (error_event->cached_state & 3523 (PARENT_WAIT|WAIT_EFFECT)) { 3524 out(O_ALTFP|O_VERB, " NOT YET triggered"); 3525 continue; 3526 } 3527 return_value = FME_DISPROVED; 3528 out(O_ALTFP|O_VERB, " NOT triggered"); 3529 break; 3530 } else { 3531 out(O_ALTFP|O_VERB, " triggered"); 3532 } 3533 } 3534 if (return_value == FME_DISPROVED) { 3535 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 3536 } else { 3537 fault_event->keep_in_tree = 1; 3538 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 3539 } 3540 3541 indent(); 3542 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 3543 fme_state2str(return_value)); 3544 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3545 out(O_ALTFP|O_VERB, NULL); 3546 indent_pop(); 3547 if (return_value == FME_WAIT) 3548 *pdelay = overall_delay; 3549 return (return_value); 3550 } 3551 3552 static enum fme_state 3553 requirements_test(struct fme *fmep, struct event *ep, 3554 unsigned long long at_latest_by, unsigned long long *pdelay) 3555 { 3556 int waiting_events; 3557 int credible_events; 3558 int deferred_events; 3559 enum fme_state return_value = FME_CREDIBLE; 3560 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3561 unsigned long long arrow_delay; 3562 unsigned long long my_delay; 3563 struct event *ep2; 3564 struct bubble *bp; 3565 struct arrowlist *ap; 3566 3567 if (ep->cached_state & REQMNTS_CREDIBLE) { 3568 indent(); 3569 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 3570 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3571 out(O_ALTFP|O_VERB, NULL); 3572 return (FME_CREDIBLE); 3573 } 3574 if (ep->cached_state & REQMNTS_DISPROVED) { 3575 indent(); 3576 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 3577 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3578 out(O_ALTFP|O_VERB, NULL); 3579 return (FME_DISPROVED); 3580 } 3581 if (ep->cached_state & REQMNTS_WAIT) { 3582 indent(); 3583 *pdelay = ep->cached_delay; 3584 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 3585 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3586 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 3587 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3588 out(O_ALTFP|O_VERB, NULL); 3589 return (FME_WAIT); 3590 } 3591 stats_counter_bump(fmep->Rcallcount); 3592 indent_push(" R"); 3593 indent(); 3594 out(O_ALTFP|O_VERB|O_NONL, "->"); 3595 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3596 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3597 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3598 out(O_ALTFP|O_VERB, NULL); 3599 3600 if (ep->t == N_EREPORT) { 3601 if (ep->count == 0) { 3602 if (fmep->pull >= at_latest_by) { 3603 return_value = FME_DISPROVED; 3604 } else { 3605 ep->cached_delay = *pdelay = at_latest_by; 3606 return_value = FME_WAIT; 3607 } 3608 } 3609 3610 indent(); 3611 switch (return_value) { 3612 case FME_CREDIBLE: 3613 ep->cached_state |= REQMNTS_CREDIBLE; 3614 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 3615 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3616 break; 3617 case FME_DISPROVED: 3618 ep->cached_state |= REQMNTS_DISPROVED; 3619 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3620 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3621 break; 3622 case FME_WAIT: 3623 ep->cached_state |= REQMNTS_WAIT; 3624 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 3625 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3626 out(O_ALTFP|O_VERB|O_NONL, " to "); 3627 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3628 break; 3629 default: 3630 out(O_DIE, "requirements_test: unexpected fme_state"); 3631 break; 3632 } 3633 out(O_ALTFP|O_VERB, NULL); 3634 indent_pop(); 3635 3636 return (return_value); 3637 } 3638 3639 /* this event is not a report, descend the tree */ 3640 for (bp = itree_next_bubble(ep, NULL); bp; 3641 bp = itree_next_bubble(ep, bp)) { 3642 int n; 3643 3644 if (bp->t != B_FROM) 3645 continue; 3646 3647 n = bp->nork; 3648 3649 credible_events = 0; 3650 waiting_events = 0; 3651 deferred_events = 0; 3652 arrow_delay = TIMEVAL_EVENTUALLY; 3653 /* 3654 * n is -1 for 'A' so adjust it. 3655 * XXX just count up the arrows for now. 3656 */ 3657 if (n < 0) { 3658 n = 0; 3659 for (ap = itree_next_arrow(bp, NULL); ap; 3660 ap = itree_next_arrow(bp, ap)) 3661 n++; 3662 indent(); 3663 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 3664 } else { 3665 indent(); 3666 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 3667 } 3668 3669 if (n == 0) 3670 continue; 3671 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 3672 for (ap = itree_next_arrow(bp, NULL); ap; 3673 ap = itree_next_arrow(bp, ap)) { 3674 ep2 = ap->arrowp->head->myevent; 3675 platform_set_payloadnvp(ep2->nvp); 3676 if (checkconstraints(fmep, ap->arrowp) == 0) { 3677 /* 3678 * if any arrow is invalidated by the 3679 * constraints, then we should elide the 3680 * whole bubble to be consistant with 3681 * the tree creation time behaviour 3682 */ 3683 bp->mark |= BUBBLE_ELIDED; 3684 platform_set_payloadnvp(NULL); 3685 break; 3686 } 3687 platform_set_payloadnvp(NULL); 3688 } 3689 } 3690 if (bp->mark & BUBBLE_ELIDED) 3691 continue; 3692 bp->mark |= BUBBLE_OK; 3693 for (ap = itree_next_arrow(bp, NULL); ap; 3694 ap = itree_next_arrow(bp, ap)) { 3695 ep2 = ap->arrowp->head->myevent; 3696 if (n <= credible_events) 3697 break; 3698 3699 ap->arrowp->mark |= REQMNTS_COUNTER; 3700 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 3701 /* XXX adding max timevals! */ 3702 switch (requirements_test(fmep, ep2, 3703 at_latest_by + ap->arrowp->maxdelay, 3704 &my_delay)) { 3705 case FME_DEFERRED: 3706 deferred_events++; 3707 break; 3708 case FME_CREDIBLE: 3709 credible_events++; 3710 break; 3711 case FME_DISPROVED: 3712 break; 3713 case FME_WAIT: 3714 if (my_delay < arrow_delay) 3715 arrow_delay = my_delay; 3716 waiting_events++; 3717 break; 3718 default: 3719 out(O_DIE, 3720 "Bug in requirements_test."); 3721 } 3722 else 3723 deferred_events++; 3724 } 3725 indent(); 3726 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3727 credible_events + deferred_events, waiting_events); 3728 if (credible_events + deferred_events + waiting_events < n) { 3729 /* Can never meet requirements */ 3730 ep->cached_state |= REQMNTS_DISPROVED; 3731 indent(); 3732 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3733 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3734 out(O_ALTFP|O_VERB, NULL); 3735 indent_pop(); 3736 return (FME_DISPROVED); 3737 } 3738 if (credible_events + deferred_events < n) { 3739 /* will have to wait */ 3740 /* wait time is shortest known */ 3741 if (arrow_delay < overall_delay) 3742 overall_delay = arrow_delay; 3743 return_value = FME_WAIT; 3744 } else if (credible_events < n) { 3745 if (return_value != FME_WAIT) 3746 return_value = FME_DEFERRED; 3747 } 3748 } 3749 3750 /* 3751 * don't mark as FME_DEFERRED. If this event isn't reached by another 3752 * path, then this will be considered FME_CREDIBLE. But if it is 3753 * reached by a different path so the K-count is met, then might 3754 * get overridden by FME_WAIT or FME_DISPROVED. 3755 */ 3756 if (return_value == FME_WAIT) { 3757 ep->cached_state |= REQMNTS_WAIT; 3758 ep->cached_delay = *pdelay = overall_delay; 3759 } else if (return_value == FME_CREDIBLE) { 3760 ep->cached_state |= REQMNTS_CREDIBLE; 3761 } 3762 indent(); 3763 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3764 fme_state2str(return_value)); 3765 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3766 out(O_ALTFP|O_VERB, NULL); 3767 indent_pop(); 3768 return (return_value); 3769 } 3770 3771 static enum fme_state 3772 causes_test(struct fme *fmep, struct event *ep, 3773 unsigned long long at_latest_by, unsigned long long *pdelay) 3774 { 3775 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3776 unsigned long long my_delay; 3777 int credible_results = 0; 3778 int waiting_results = 0; 3779 enum fme_state fstate; 3780 struct event *tail_event; 3781 struct bubble *bp; 3782 struct arrowlist *ap; 3783 int k = 1; 3784 3785 stats_counter_bump(fmep->Ccallcount); 3786 indent_push(" C"); 3787 indent(); 3788 out(O_ALTFP|O_VERB|O_NONL, "->"); 3789 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3790 out(O_ALTFP|O_VERB, NULL); 3791 3792 for (bp = itree_next_bubble(ep, NULL); bp; 3793 bp = itree_next_bubble(ep, bp)) { 3794 if (bp->t != B_TO) 3795 continue; 3796 k = bp->nork; /* remember the K value */ 3797 for (ap = itree_next_arrow(bp, NULL); ap; 3798 ap = itree_next_arrow(bp, ap)) { 3799 int do_not_follow = 0; 3800 3801 /* 3802 * if we get to the same event multiple times 3803 * only worry about the first one. 3804 */ 3805 if (ap->arrowp->tail->myevent->cached_state & 3806 CAUSES_TESTED) { 3807 indent(); 3808 out(O_ALTFP|O_VERB|O_NONL, 3809 " causes test already run for "); 3810 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3811 ap->arrowp->tail->myevent); 3812 out(O_ALTFP|O_VERB, NULL); 3813 continue; 3814 } 3815 3816 /* 3817 * see if false constraint prevents us 3818 * from traversing this arrow 3819 */ 3820 platform_set_payloadnvp(ep->nvp); 3821 if (checkconstraints(fmep, ap->arrowp) == 0) 3822 do_not_follow = 1; 3823 platform_set_payloadnvp(NULL); 3824 if (do_not_follow) { 3825 indent(); 3826 out(O_ALTFP|O_VERB|O_NONL, 3827 " False arrow from "); 3828 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3829 ap->arrowp->tail->myevent); 3830 out(O_ALTFP|O_VERB, NULL); 3831 continue; 3832 } 3833 3834 ap->arrowp->tail->myevent->cached_state |= 3835 CAUSES_TESTED; 3836 tail_event = ap->arrowp->tail->myevent; 3837 fstate = hypothesise(fmep, tail_event, at_latest_by, 3838 &my_delay); 3839 3840 switch (fstate) { 3841 case FME_WAIT: 3842 if (my_delay < overall_delay) 3843 overall_delay = my_delay; 3844 waiting_results++; 3845 break; 3846 case FME_CREDIBLE: 3847 credible_results++; 3848 break; 3849 case FME_DISPROVED: 3850 break; 3851 default: 3852 out(O_DIE, "Bug in causes_test"); 3853 } 3854 } 3855 } 3856 /* compare against K */ 3857 if (credible_results + waiting_results < k) { 3858 indent(); 3859 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3860 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3861 out(O_ALTFP|O_VERB, NULL); 3862 indent_pop(); 3863 return (FME_DISPROVED); 3864 } 3865 if (waiting_results != 0) { 3866 *pdelay = overall_delay; 3867 indent(); 3868 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3869 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3870 out(O_ALTFP|O_VERB|O_NONL, " to "); 3871 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3872 out(O_ALTFP|O_VERB, NULL); 3873 indent_pop(); 3874 return (FME_WAIT); 3875 } 3876 indent(); 3877 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3878 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3879 out(O_ALTFP|O_VERB, NULL); 3880 indent_pop(); 3881 return (FME_CREDIBLE); 3882 } 3883 3884 static enum fme_state 3885 hypothesise(struct fme *fmep, struct event *ep, 3886 unsigned long long at_latest_by, unsigned long long *pdelay) 3887 { 3888 enum fme_state rtr, otr; 3889 unsigned long long my_delay; 3890 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3891 3892 stats_counter_bump(fmep->Hcallcount); 3893 indent_push(" H"); 3894 indent(); 3895 out(O_ALTFP|O_VERB|O_NONL, "->"); 3896 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3897 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3898 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3899 out(O_ALTFP|O_VERB, NULL); 3900 3901 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3902 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3903 overall_delay = my_delay; 3904 if (rtr != FME_DISPROVED) { 3905 if (is_problem(ep->t)) { 3906 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3907 if (otr != FME_DISPROVED) { 3908 if (fmep->peek == 0 && ep->is_suspect == 0) { 3909 ep->suspects = fmep->suspects; 3910 ep->is_suspect = 1; 3911 fmep->suspects = ep; 3912 fmep->nsuspects++; 3913 if (!is_fault(ep->t)) 3914 fmep->nonfault++; 3915 } 3916 } 3917 } else 3918 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3919 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3920 overall_delay = my_delay; 3921 if ((otr != FME_DISPROVED) && 3922 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3923 *pdelay = overall_delay; 3924 } 3925 if (rtr == FME_DISPROVED) { 3926 indent(); 3927 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3928 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3929 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3930 indent_pop(); 3931 return (FME_DISPROVED); 3932 } 3933 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3934 indent(); 3935 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3936 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3937 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3938 indent_pop(); 3939 return (FME_DISPROVED); 3940 } 3941 if (otr == FME_DISPROVED) { 3942 indent(); 3943 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3944 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3945 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3946 indent_pop(); 3947 return (FME_DISPROVED); 3948 } 3949 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3950 indent(); 3951 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3952 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3953 out(O_ALTFP|O_VERB|O_NONL, " to "); 3954 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3955 out(O_ALTFP|O_VERB, NULL); 3956 indent_pop(); 3957 return (FME_WAIT); 3958 } 3959 indent(); 3960 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3961 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3962 out(O_ALTFP|O_VERB, NULL); 3963 indent_pop(); 3964 return (FME_CREDIBLE); 3965 } 3966 3967 /* 3968 * fme_istat_load -- reconstitute any persistent istats 3969 */ 3970 void 3971 fme_istat_load(fmd_hdl_t *hdl) 3972 { 3973 int sz; 3974 char *sbuf; 3975 char *ptr; 3976 3977 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3978 out(O_ALTFP, "fme_istat_load: No stats"); 3979 return; 3980 } 3981 3982 sbuf = alloca(sz); 3983 3984 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 3985 3986 /* 3987 * pick apart the serialized stats 3988 * 3989 * format is: 3990 * <class-name>, '@', <path>, '\0', <value>, '\0' 3991 * for example: 3992 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 3993 * 3994 * since this is parsing our own serialized data, any parsing issues 3995 * are fatal, so we check for them all with ASSERT() below. 3996 */ 3997 ptr = sbuf; 3998 while (ptr < &sbuf[sz]) { 3999 char *sepptr; 4000 struct node *np; 4001 int val; 4002 4003 sepptr = strchr(ptr, '@'); 4004 ASSERT(sepptr != NULL); 4005 *sepptr = '\0'; 4006 4007 /* construct the event */ 4008 np = newnode(T_EVENT, NULL, 0); 4009 np->u.event.ename = newnode(T_NAME, NULL, 0); 4010 np->u.event.ename->u.name.t = N_STAT; 4011 np->u.event.ename->u.name.s = stable(ptr); 4012 np->u.event.ename->u.name.it = IT_ENAME; 4013 np->u.event.ename->u.name.last = np->u.event.ename; 4014 4015 ptr = sepptr + 1; 4016 ASSERT(ptr < &sbuf[sz]); 4017 ptr += strlen(ptr); 4018 ptr++; /* move past the '\0' separating path from value */ 4019 ASSERT(ptr < &sbuf[sz]); 4020 ASSERT(isdigit(*ptr)); 4021 val = atoi(ptr); 4022 ASSERT(val > 0); 4023 ptr += strlen(ptr); 4024 ptr++; /* move past the final '\0' for this entry */ 4025 4026 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 4027 ASSERT(np->u.event.epname != NULL); 4028 4029 istat_bump(np, val); 4030 tree_free(np); 4031 } 4032 4033 istat_save(); 4034 } 4035