1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 * 25 * fme.c -- fault management exercise module 26 * 27 * this module provides the simulated fault management exercise. 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <strings.h> 34 #include <ctype.h> 35 #include <alloca.h> 36 #include <libnvpair.h> 37 #include <sys/fm/protocol.h> 38 #include <fm/fmd_api.h> 39 #include "alloc.h" 40 #include "out.h" 41 #include "stats.h" 42 #include "stable.h" 43 #include "literals.h" 44 #include "lut.h" 45 #include "tree.h" 46 #include "ptree.h" 47 #include "itree.h" 48 #include "ipath.h" 49 #include "fme.h" 50 #include "evnv.h" 51 #include "eval.h" 52 #include "config.h" 53 #include "platform.h" 54 #include "esclex.h" 55 56 /* imported from eft.c... */ 57 extern hrtime_t Hesitate; 58 extern char *Serd_Override; 59 extern nv_alloc_t Eft_nv_hdl; 60 extern int Max_fme; 61 extern fmd_hdl_t *Hdl; 62 63 static int Istat_need_save; 64 static int Serd_need_save; 65 void istat_save(void); 66 void serd_save(void); 67 68 /* fme under construction is global so we can free it on module abort */ 69 static struct fme *Nfmep; 70 71 static int Undiag_reason = UD_VAL_UNKNOWN; 72 73 static int Nextid = 0; 74 75 static int Open_fme_count = 0; /* Count of open FMEs */ 76 77 /* list of fault management exercises underway */ 78 static struct fme { 79 struct fme *next; /* next exercise */ 80 unsigned long long ull; /* time when fme was created */ 81 int id; /* FME id */ 82 struct config *config; /* cooked configuration data */ 83 struct lut *eventtree; /* propagation tree for this FME */ 84 /* 85 * The initial error report that created this FME is kept in 86 * two forms. e0 points to the instance tree node and is used 87 * by fme_eval() as the starting point for the inference 88 * algorithm. e0r is the event handle FMD passed to us when 89 * the ereport first arrived and is used when setting timers, 90 * which are always relative to the time of this initial 91 * report. 92 */ 93 struct event *e0; 94 fmd_event_t *e0r; 95 96 id_t timer; /* for setting an fmd time-out */ 97 98 struct event *ecurrent; /* ereport under consideration */ 99 struct event *suspects; /* current suspect list */ 100 struct event *psuspects; /* previous suspect list */ 101 int nsuspects; /* count of suspects */ 102 int posted_suspects; /* true if we've posted a diagnosis */ 103 int uniqobs; /* number of unique events observed */ 104 int peek; /* just peeking, don't track suspects */ 105 int overflow; /* true if overflow FME */ 106 enum fme_state { 107 FME_NOTHING = 5000, /* not evaluated yet */ 108 FME_WAIT, /* need to wait for more info */ 109 FME_CREDIBLE, /* suspect list is credible */ 110 FME_DISPROVED, /* no valid suspects found */ 111 FME_DEFERRED /* don't know yet (k-count not met) */ 112 } state; 113 114 unsigned long long pull; /* time passed since created */ 115 unsigned long long wull; /* wait until this time for re-eval */ 116 struct event *observations; /* observation list */ 117 struct lut *globals; /* values of global variables */ 118 /* fmd interfacing */ 119 fmd_hdl_t *hdl; /* handle for talking with fmd */ 120 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 121 /* stats */ 122 struct stats *Rcount; 123 struct stats *Hcallcount; 124 struct stats *Rcallcount; 125 struct stats *Ccallcount; 126 struct stats *Ecallcount; 127 struct stats *Tcallcount; 128 struct stats *Marrowcount; 129 struct stats *diags; 130 } *FMElist, *EFMElist, *ClosedFMEs; 131 132 static struct case_list { 133 fmd_case_t *fmcase; 134 struct case_list *next; 135 } *Undiagablecaselist; 136 137 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 138 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 139 unsigned long long at_latest_by, unsigned long long *pdelay); 140 static struct node *eventprop_lookup(struct event *ep, const char *propname); 141 static struct node *pathstring2epnamenp(char *path); 142 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, 143 fmd_case_t *fmcase, nvlist_t *detector, char *arg); 144 static char *undiag_2reason_str(int ud, char *arg); 145 static const char *undiag_2defect_str(int ud); 146 static void restore_suspects(struct fme *fmep); 147 static void save_suspects(struct fme *fmep); 148 static void destroy_fme(struct fme *f); 149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 150 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 151 static void istat_counter_reset_cb(struct istat_entry *entp, 152 struct stats *statp, const struct ipath *ipp); 153 static void istat_counter_topo_chg_cb(struct istat_entry *entp, 154 struct stats *statp, void *unused); 155 static void serd_reset_cb(struct serd_entry *entp, void *unused, 156 const struct ipath *ipp); 157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused, 158 void *unused2); 159 static void destroy_fme_bufs(struct fme *fp); 160 161 static struct fme * 162 alloc_fme(void) 163 { 164 struct fme *fmep; 165 166 fmep = MALLOC(sizeof (*fmep)); 167 bzero(fmep, sizeof (*fmep)); 168 return (fmep); 169 } 170 171 /* 172 * fme_ready -- called when all initialization of the FME (except for 173 * stats) has completed successfully. Adds the fme to global lists 174 * and establishes its stats. 175 */ 176 static struct fme * 177 fme_ready(struct fme *fmep) 178 { 179 char nbuf[100]; 180 181 Nfmep = NULL; /* don't need to free this on module abort now */ 182 183 if (EFMElist) { 184 EFMElist->next = fmep; 185 EFMElist = fmep; 186 } else 187 FMElist = EFMElist = fmep; 188 189 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 190 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 191 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 192 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 193 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 194 fmep->Rcallcount = stats_new_counter(nbuf, 195 "calls to requirements_test()", 1); 196 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 197 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 198 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 199 fmep->Ecallcount = 200 stats_new_counter(nbuf, "calls to effects_test()", 1); 201 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 202 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 203 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 204 fmep->Marrowcount = stats_new_counter(nbuf, 205 "arrows marked by mark_arrows()", 1); 206 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 207 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 208 209 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 210 config_print(O_ALTFP|O_VERB2, fmep->config); 211 212 return (fmep); 213 } 214 215 extern void ipath_dummy_lut(struct arrow *); 216 extern struct lut *itree_create_dummy(const char *, const struct ipath *); 217 218 /* ARGSUSED */ 219 static void 220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 221 { 222 struct bubble *bp; 223 struct arrowlist *ap; 224 225 for (bp = itree_next_bubble(ep, NULL); bp; 226 bp = itree_next_bubble(ep, bp)) { 227 if (bp->t != B_FROM) 228 continue; 229 for (ap = itree_next_arrow(bp, NULL); ap; 230 ap = itree_next_arrow(bp, ap)) { 231 ap->arrowp->pnode->u.arrow.needed = 1; 232 ipath_dummy_lut(ap->arrowp); 233 } 234 } 235 } 236 237 /* ARGSUSED */ 238 static void 239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 240 { 241 struct bubble *bp; 242 struct arrowlist *ap; 243 244 for (bp = itree_next_bubble(ep, NULL); bp; 245 bp = itree_next_bubble(ep, bp)) { 246 if (bp->t != B_FROM) 247 continue; 248 for (ap = itree_next_arrow(bp, NULL); ap; 249 ap = itree_next_arrow(bp, ap)) 250 ap->arrowp->pnode->u.arrow.needed = 0; 251 } 252 } 253 254 static void globals_destructor(void *left, void *right, void *arg); 255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep); 256 257 static boolean_t 258 prune_propagations(const char *e0class, const struct ipath *e0ipp) 259 { 260 char nbuf[100]; 261 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 262 extern struct lut *Usednames; 263 264 Nfmep = alloc_fme(); 265 Nfmep->id = Nextid; 266 Nfmep->state = FME_NOTHING; 267 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp); 268 if ((Nfmep->e0 = 269 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 270 itree_free(Nfmep->eventtree); 271 FREE(Nfmep); 272 Nfmep = NULL; 273 return (B_FALSE); 274 } 275 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0; 276 Nfmep->e0->count++; 277 278 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id); 279 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 280 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id); 281 Nfmep->Hcallcount = 282 stats_new_counter(nbuf, "calls to hypothesise()", 1); 283 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id); 284 Nfmep->Rcallcount = stats_new_counter(nbuf, 285 "calls to requirements_test()", 1); 286 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id); 287 Nfmep->Ccallcount = 288 stats_new_counter(nbuf, "calls to causes_test()", 1); 289 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id); 290 Nfmep->Ecallcount = 291 stats_new_counter(nbuf, "calls to effects_test()", 1); 292 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id); 293 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 294 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id); 295 Nfmep->Marrowcount = stats_new_counter(nbuf, 296 "arrows marked by mark_arrows()", 1); 297 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id); 298 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 299 300 Nfmep->peek = 1; 301 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep); 302 lut_free(Usednames, NULL, NULL); 303 Usednames = NULL; 304 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep); 305 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay); 306 itree_prune(Nfmep->eventtree); 307 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep); 308 309 stats_delete(Nfmep->Rcount); 310 stats_delete(Nfmep->Hcallcount); 311 stats_delete(Nfmep->Rcallcount); 312 stats_delete(Nfmep->Ccallcount); 313 stats_delete(Nfmep->Ecallcount); 314 stats_delete(Nfmep->Tcallcount); 315 stats_delete(Nfmep->Marrowcount); 316 stats_delete(Nfmep->diags); 317 itree_free(Nfmep->eventtree); 318 lut_free(Nfmep->globals, globals_destructor, NULL); 319 FREE(Nfmep); 320 return (B_TRUE); 321 } 322 323 static struct fme * 324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl, 325 fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl) 326 { 327 struct cfgdata *cfgdata; 328 int init_size; 329 extern int alloc_total(); 330 nvlist_t *detector = NULL; 331 char *pathstr; 332 char *arg; 333 334 /* 335 * First check if e0ipp is actually in the topology so we can give a 336 * more useful error message. 337 */ 338 ipathlastcomp(e0ipp); 339 pathstr = ipath2str(NULL, e0ipp); 340 cfgdata = config_snapshot(); 341 platform_units_translate(0, cfgdata->cooked, NULL, NULL, 342 &detector, pathstr); 343 FREE(pathstr); 344 structconfig_free(cfgdata->cooked); 345 config_free(cfgdata); 346 if (detector == NULL) { 347 Undiag_reason = UD_VAL_BADEVENTPATH; 348 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, 349 &detector); 350 arg = ipath2str(e0class, e0ipp); 351 publish_undiagnosable(hdl, ffep, fmcase, detector, arg); 352 FREE(arg); 353 return (NULL); 354 } 355 356 /* 357 * Next run a quick first pass of the rules with a dummy config. This 358 * allows us to prune those rules which can't possibly cause this 359 * ereport. 360 */ 361 if (!prune_propagations(e0class, e0ipp)) { 362 /* 363 * The fault class must have been in the rules or we would 364 * not have registered for it (and got a "nosub"), and the 365 * pathname must be in the topology or we would have failed the 366 * previous test. So to get here means the combination of 367 * class and pathname in the ereport must be invalid. 368 */ 369 Undiag_reason = UD_VAL_BADEVENTCLASS; 370 arg = ipath2str(e0class, e0ipp); 371 publish_undiagnosable(hdl, ffep, fmcase, detector, arg); 372 nvlist_free(detector); 373 FREE(arg); 374 return (NULL); 375 } 376 377 /* 378 * Now go ahead and create the real fme using the pruned rules. 379 */ 380 init_size = alloc_total(); 381 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size); 382 cfgdata = config_snapshot(); 383 platform_save_config(hdl, fmcase); 384 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes", 385 alloc_total() - init_size); 386 387 Nfmep = alloc_fme(); 388 389 Nfmep->id = Nextid++; 390 Nfmep->config = cfgdata->cooked; 391 config_free(cfgdata); 392 Nfmep->posted_suspects = 0; 393 Nfmep->uniqobs = 0; 394 Nfmep->state = FME_NOTHING; 395 Nfmep->pull = 0ULL; 396 Nfmep->overflow = 0; 397 398 Nfmep->fmcase = fmcase; 399 Nfmep->hdl = hdl; 400 401 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) { 402 Undiag_reason = UD_VAL_INSTFAIL; 403 arg = ipath2str(e0class, e0ipp); 404 publish_undiagnosable(hdl, ffep, fmcase, detector, arg); 405 nvlist_free(detector); 406 FREE(arg); 407 structconfig_free(Nfmep->config); 408 destroy_fme_bufs(Nfmep); 409 FREE(Nfmep); 410 Nfmep = NULL; 411 return (NULL); 412 } 413 414 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 415 416 if ((Nfmep->e0 = 417 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 418 Undiag_reason = UD_VAL_BADEVENTI; 419 arg = ipath2str(e0class, e0ipp); 420 publish_undiagnosable(hdl, ffep, fmcase, detector, arg); 421 nvlist_free(detector); 422 FREE(arg); 423 itree_free(Nfmep->eventtree); 424 structconfig_free(Nfmep->config); 425 destroy_fme_bufs(Nfmep); 426 FREE(Nfmep); 427 Nfmep = NULL; 428 return (NULL); 429 } 430 431 nvlist_free(detector); 432 return (fme_ready(Nfmep)); 433 } 434 435 void 436 fme_fini(void) 437 { 438 struct fme *sfp, *fp; 439 struct case_list *ucasep, *nextcasep; 440 441 ucasep = Undiagablecaselist; 442 while (ucasep != NULL) { 443 nextcasep = ucasep->next; 444 FREE(ucasep); 445 ucasep = nextcasep; 446 } 447 Undiagablecaselist = NULL; 448 449 /* clean up closed fmes */ 450 fp = ClosedFMEs; 451 while (fp != NULL) { 452 sfp = fp->next; 453 destroy_fme(fp); 454 fp = sfp; 455 } 456 ClosedFMEs = NULL; 457 458 fp = FMElist; 459 while (fp != NULL) { 460 sfp = fp->next; 461 destroy_fme(fp); 462 fp = sfp; 463 } 464 FMElist = EFMElist = NULL; 465 466 /* if we were in the middle of creating an fme, free it now */ 467 if (Nfmep) { 468 destroy_fme(Nfmep); 469 Nfmep = NULL; 470 } 471 } 472 473 /* 474 * Allocated space for a buffer name. 20 bytes allows for 475 * a ridiculous 9,999,999 unique observations. 476 */ 477 #define OBBUFNMSZ 20 478 479 /* 480 * serialize_observation 481 * 482 * Create a recoverable version of the current observation 483 * (f->ecurrent). We keep a serialized version of each unique 484 * observation in order that we may resume correctly the fme in the 485 * correct state if eft or fmd crashes and we're restarted. 486 */ 487 static void 488 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 489 { 490 size_t pkdlen; 491 char tmpbuf[OBBUFNMSZ]; 492 char *pkd = NULL; 493 char *estr; 494 495 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 496 estr = ipath2str(cls, ipp); 497 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 498 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 499 strlen(estr) + 1); 500 FREE(estr); 501 502 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 503 (void) snprintf(tmpbuf, 504 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 505 if (nvlist_xpack(fp->ecurrent->nvp, 506 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 507 out(O_DIE|O_SYS, "pack of observed nvl failed"); 508 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 509 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 510 FREE(pkd); 511 } 512 513 fp->uniqobs++; 514 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 515 sizeof (fp->uniqobs)); 516 } 517 518 /* 519 * init_fme_bufs -- We keep several bits of state about an fme for 520 * use if eft or fmd crashes and we're restarted. 521 */ 522 static void 523 init_fme_bufs(struct fme *fp) 524 { 525 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 526 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 527 sizeof (fp->pull)); 528 529 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 530 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 531 sizeof (fp->id)); 532 533 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 534 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 535 sizeof (fp->uniqobs)); 536 537 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 538 sizeof (fp->posted_suspects)); 539 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 540 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 541 } 542 543 static void 544 destroy_fme_bufs(struct fme *fp) 545 { 546 char tmpbuf[OBBUFNMSZ]; 547 int o; 548 549 platform_restore_config(fp->hdl, fp->fmcase); 550 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 551 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 552 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 553 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 554 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 555 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 556 557 for (o = 0; o < fp->uniqobs; o++) { 558 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 559 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 560 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 561 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 562 } 563 } 564 565 /* 566 * reconstitute_observations -- convert a case's serialized observations 567 * back into struct events. Returns zero if all observations are 568 * successfully reconstituted. 569 */ 570 static int 571 reconstitute_observations(struct fme *fmep) 572 { 573 struct event *ep; 574 struct node *epnamenp = NULL; 575 size_t pkdlen; 576 char *pkd = NULL; 577 char *tmpbuf = alloca(OBBUFNMSZ); 578 char *sepptr; 579 char *estr; 580 int ocnt; 581 int elen; 582 583 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 584 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 585 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 586 if (elen == 0) { 587 out(O_ALTFP, 588 "reconstitute_observation: no %s buffer found.", 589 tmpbuf); 590 Undiag_reason = UD_VAL_MISSINGOBS; 591 break; 592 } 593 594 estr = MALLOC(elen); 595 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 596 sepptr = strchr(estr, '@'); 597 if (sepptr == NULL) { 598 out(O_ALTFP, 599 "reconstitute_observation: %s: " 600 "missing @ separator in %s.", 601 tmpbuf, estr); 602 Undiag_reason = UD_VAL_MISSINGPATH; 603 FREE(estr); 604 break; 605 } 606 607 *sepptr = '\0'; 608 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 609 out(O_ALTFP, 610 "reconstitute_observation: %s: " 611 "trouble converting path string \"%s\" " 612 "to internal representation.", 613 tmpbuf, sepptr + 1); 614 Undiag_reason = UD_VAL_MISSINGPATH; 615 FREE(estr); 616 break; 617 } 618 619 /* construct the event */ 620 ep = itree_lookup(fmep->eventtree, 621 stable(estr), ipath(epnamenp)); 622 if (ep == NULL) { 623 out(O_ALTFP, 624 "reconstitute_observation: %s: " 625 "lookup of \"%s\" in itree failed.", 626 tmpbuf, ipath2str(estr, ipath(epnamenp))); 627 Undiag_reason = UD_VAL_BADOBS; 628 tree_free(epnamenp); 629 FREE(estr); 630 break; 631 } 632 tree_free(epnamenp); 633 634 /* 635 * We may or may not have a saved nvlist for the observation 636 */ 637 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 638 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 639 if (pkdlen != 0) { 640 pkd = MALLOC(pkdlen); 641 fmd_buf_read(fmep->hdl, 642 fmep->fmcase, tmpbuf, pkd, pkdlen); 643 ASSERT(ep->nvp == NULL); 644 if (nvlist_xunpack(pkd, 645 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 646 out(O_DIE|O_SYS, "pack of observed nvl failed"); 647 FREE(pkd); 648 } 649 650 if (ocnt == 0) 651 fmep->e0 = ep; 652 653 FREE(estr); 654 fmep->ecurrent = ep; 655 ep->count++; 656 657 /* link it into list of observations seen */ 658 ep->observations = fmep->observations; 659 fmep->observations = ep; 660 } 661 662 if (ocnt == fmep->uniqobs) { 663 (void) fme_ready(fmep); 664 return (0); 665 } 666 667 return (1); 668 } 669 670 /* 671 * restart_fme -- called during eft initialization. Reconstitutes 672 * an in-progress fme. 673 */ 674 void 675 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 676 { 677 nvlist_t *defect; 678 struct case_list *bad; 679 struct fme *fmep; 680 struct cfgdata *cfgdata; 681 size_t rawsz; 682 struct event *ep; 683 char *tmpbuf = alloca(OBBUFNMSZ); 684 char *sepptr; 685 char *estr; 686 int elen; 687 struct node *epnamenp = NULL; 688 int init_size; 689 extern int alloc_total(); 690 char *reason; 691 692 /* 693 * ignore solved or closed cases 694 */ 695 if (fmd_case_solved(hdl, inprogress) || 696 fmd_case_closed(hdl, inprogress)) 697 return; 698 699 fmep = alloc_fme(); 700 fmep->fmcase = inprogress; 701 fmep->hdl = hdl; 702 703 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 704 out(O_ALTFP, "restart_fme: no saved posted status"); 705 Undiag_reason = UD_VAL_MISSINGINFO; 706 goto badcase; 707 } else { 708 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 709 (void *)&fmep->posted_suspects, 710 sizeof (fmep->posted_suspects)); 711 } 712 713 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 714 out(O_ALTFP, "restart_fme: no saved id"); 715 Undiag_reason = UD_VAL_MISSINGINFO; 716 goto badcase; 717 } else { 718 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 719 sizeof (fmep->id)); 720 } 721 if (Nextid <= fmep->id) 722 Nextid = fmep->id + 1; 723 724 out(O_ALTFP, "Replay FME %d", fmep->id); 725 726 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 727 out(O_ALTFP, "restart_fme: No config data"); 728 Undiag_reason = UD_VAL_MISSINGINFO; 729 goto badcase; 730 } 731 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 732 sizeof (size_t)); 733 734 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 735 out(O_ALTFP, "restart_fme: No event zero"); 736 Undiag_reason = UD_VAL_MISSINGZERO; 737 goto badcase; 738 } 739 740 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 741 out(O_ALTFP, "restart_fme: no saved wait time"); 742 Undiag_reason = UD_VAL_MISSINGINFO; 743 goto badcase; 744 } else { 745 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 746 sizeof (fmep->pull)); 747 } 748 749 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 750 out(O_ALTFP, "restart_fme: no count of observations"); 751 Undiag_reason = UD_VAL_MISSINGINFO; 752 goto badcase; 753 } else { 754 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 755 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 756 } 757 758 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0"); 759 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 760 if (elen == 0) { 761 out(O_ALTFP, "reconstitute_observation: no %s buffer found.", 762 tmpbuf); 763 Undiag_reason = UD_VAL_MISSINGOBS; 764 goto badcase; 765 } 766 estr = MALLOC(elen); 767 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 768 sepptr = strchr(estr, '@'); 769 if (sepptr == NULL) { 770 out(O_ALTFP, "reconstitute_observation: %s: " 771 "missing @ separator in %s.", 772 tmpbuf, estr); 773 Undiag_reason = UD_VAL_MISSINGPATH; 774 FREE(estr); 775 goto badcase; 776 } 777 *sepptr = '\0'; 778 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 779 out(O_ALTFP, "reconstitute_observation: %s: " 780 "trouble converting path string \"%s\" " 781 "to internal representation.", tmpbuf, sepptr + 1); 782 Undiag_reason = UD_VAL_MISSINGPATH; 783 FREE(estr); 784 goto badcase; 785 } 786 (void) prune_propagations(stable(estr), ipath(epnamenp)); 787 tree_free(epnamenp); 788 FREE(estr); 789 790 init_size = alloc_total(); 791 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size); 792 cfgdata = MALLOC(sizeof (struct cfgdata)); 793 cfgdata->cooked = NULL; 794 cfgdata->devcache = NULL; 795 cfgdata->devidcache = NULL; 796 cfgdata->tpcache = NULL; 797 cfgdata->cpucache = NULL; 798 cfgdata->raw_refcnt = 1; 799 800 if (rawsz > 0) { 801 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 802 out(O_ALTFP, "restart_fme: Config data size mismatch"); 803 Undiag_reason = UD_VAL_CFGMISMATCH; 804 goto badcase; 805 } 806 cfgdata->begin = MALLOC(rawsz); 807 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 808 fmd_buf_read(hdl, 809 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 810 } else { 811 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 812 } 813 814 config_cook(cfgdata); 815 fmep->config = cfgdata->cooked; 816 config_free(cfgdata); 817 out(O_ALTFP|O_STAMP, "config_restore added %d bytes", 818 alloc_total() - init_size); 819 820 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) { 821 /* case not properly saved or irretrievable */ 822 out(O_ALTFP, "restart_fme: NULL instance tree"); 823 Undiag_reason = UD_VAL_INSTFAIL; 824 goto badcase; 825 } 826 827 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 828 829 if (reconstitute_observations(fmep) != 0) 830 goto badcase; 831 832 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id); 833 for (ep = fmep->observations; ep; ep = ep->observations) { 834 out(O_ALTFP|O_NONL, " "); 835 itree_pevent_brief(O_ALTFP|O_NONL, ep); 836 } 837 out(O_ALTFP, NULL); 838 839 Open_fme_count++; 840 841 /* give the diagnosis algorithm a shot at the new FME state */ 842 fme_eval(fmep, fmep->e0r); 843 return; 844 845 badcase: 846 if (fmep->eventtree != NULL) 847 itree_free(fmep->eventtree); 848 if (fmep->config) 849 structconfig_free(fmep->config); 850 destroy_fme_bufs(fmep); 851 FREE(fmep); 852 853 /* 854 * Since we're unable to restart the case, add it to the undiagable 855 * list and solve and close it as appropriate. 856 */ 857 bad = MALLOC(sizeof (struct case_list)); 858 bad->next = NULL; 859 860 if (Undiagablecaselist != NULL) 861 bad->next = Undiagablecaselist; 862 Undiagablecaselist = bad; 863 bad->fmcase = inprogress; 864 865 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ", 866 fmd_case_uuid(hdl, bad->fmcase)); 867 868 if (fmd_case_solved(hdl, bad->fmcase)) { 869 out(O_ALTFP|O_NONL, "already solved, "); 870 } else { 871 out(O_ALTFP|O_NONL, "solving, "); 872 defect = fmd_nvl_create_fault(hdl, 873 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL); 874 reason = undiag_2reason_str(Undiag_reason, NULL); 875 (void) nvlist_add_string(defect, UNDIAG_REASON, reason); 876 FREE(reason); 877 fmd_case_add_suspect(hdl, bad->fmcase, defect); 878 fmd_case_solve(hdl, bad->fmcase); 879 Undiag_reason = UD_VAL_UNKNOWN; 880 } 881 882 if (fmd_case_closed(hdl, bad->fmcase)) { 883 out(O_ALTFP, "already closed ]"); 884 } else { 885 out(O_ALTFP, "closing ]"); 886 fmd_case_close(hdl, bad->fmcase); 887 } 888 } 889 890 /*ARGSUSED*/ 891 static void 892 globals_destructor(void *left, void *right, void *arg) 893 { 894 struct evalue *evp = (struct evalue *)right; 895 if (evp->t == NODEPTR) 896 tree_free((struct node *)(uintptr_t)evp->v); 897 evp->v = (uintptr_t)NULL; 898 FREE(evp); 899 } 900 901 void 902 destroy_fme(struct fme *f) 903 { 904 stats_delete(f->Rcount); 905 stats_delete(f->Hcallcount); 906 stats_delete(f->Rcallcount); 907 stats_delete(f->Ccallcount); 908 stats_delete(f->Ecallcount); 909 stats_delete(f->Tcallcount); 910 stats_delete(f->Marrowcount); 911 stats_delete(f->diags); 912 913 if (f->eventtree != NULL) 914 itree_free(f->eventtree); 915 if (f->config) 916 structconfig_free(f->config); 917 lut_free(f->globals, globals_destructor, NULL); 918 FREE(f); 919 } 920 921 static const char * 922 fme_state2str(enum fme_state s) 923 { 924 switch (s) { 925 case FME_NOTHING: return ("NOTHING"); 926 case FME_WAIT: return ("WAIT"); 927 case FME_CREDIBLE: return ("CREDIBLE"); 928 case FME_DISPROVED: return ("DISPROVED"); 929 case FME_DEFERRED: return ("DEFERRED"); 930 default: return ("UNKNOWN"); 931 } 932 } 933 934 static int 935 is_problem(enum nametype t) 936 { 937 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 938 } 939 940 static int 941 is_defect(enum nametype t) 942 { 943 return (t == N_DEFECT); 944 } 945 946 static int 947 is_upset(enum nametype t) 948 { 949 return (t == N_UPSET); 950 } 951 952 static void 953 fme_print(int flags, struct fme *fmep) 954 { 955 struct event *ep; 956 957 out(flags, "Fault Management Exercise %d", fmep->id); 958 out(flags, "\t State: %s", fme_state2str(fmep->state)); 959 out(flags|O_NONL, "\t Start time: "); 960 ptree_timeval(flags|O_NONL, &fmep->ull); 961 out(flags, NULL); 962 if (fmep->wull) { 963 out(flags|O_NONL, "\t Wait time: "); 964 ptree_timeval(flags|O_NONL, &fmep->wull); 965 out(flags, NULL); 966 } 967 out(flags|O_NONL, "\t E0: "); 968 if (fmep->e0) 969 itree_pevent_brief(flags|O_NONL, fmep->e0); 970 else 971 out(flags|O_NONL, "NULL"); 972 out(flags, NULL); 973 out(flags|O_NONL, "\tObservations:"); 974 for (ep = fmep->observations; ep; ep = ep->observations) { 975 out(flags|O_NONL, " "); 976 itree_pevent_brief(flags|O_NONL, ep); 977 } 978 out(flags, NULL); 979 out(flags|O_NONL, "\tSuspect list:"); 980 for (ep = fmep->suspects; ep; ep = ep->suspects) { 981 out(flags|O_NONL, " "); 982 itree_pevent_brief(flags|O_NONL, ep); 983 } 984 out(flags, NULL); 985 if (fmep->eventtree != NULL) { 986 out(flags|O_VERB2, "\t Tree:"); 987 itree_ptree(flags|O_VERB2, fmep->eventtree); 988 } 989 } 990 991 static struct node * 992 pathstring2epnamenp(char *path) 993 { 994 char *sep = "/"; 995 struct node *ret; 996 char *ptr; 997 998 if ((ptr = strtok(path, sep)) == NULL) 999 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 1000 1001 ret = tree_iname(stable(ptr), NULL, 0); 1002 1003 while ((ptr = strtok(NULL, sep)) != NULL) 1004 ret = tree_name_append(ret, 1005 tree_iname(stable(ptr), NULL, 0)); 1006 1007 return (ret); 1008 } 1009 1010 /* 1011 * for a given upset sp, increment the corresponding SERD engine. if the 1012 * SERD engine trips, return the ename and ipp of the resulting ereport. 1013 * returns true if engine tripped and *enamep and *ippp were filled in. 1014 */ 1015 static int 1016 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 1017 fmd_case_t *fmcase, struct event *sp, const char **enamep, 1018 const struct ipath **ippp) 1019 { 1020 struct node *serdinst; 1021 char *serdname; 1022 char *serdresource; 1023 char *serdclass; 1024 struct node *nid; 1025 struct serd_entry *newentp; 1026 int i, serdn = -1, serdincrement = 1, len = 0; 1027 char *serdsuffix = NULL, *serdt = NULL; 1028 struct evalue *ep; 1029 1030 ASSERT(sp->t == N_UPSET); 1031 ASSERT(ffep != NULL); 1032 1033 if ((ep = (struct evalue *)lut_lookup(sp->serdprops, 1034 (void *)"n", (lut_cmp)strcmp)) != NULL) { 1035 ASSERT(ep->t == UINT64); 1036 serdn = (int)ep->v; 1037 } 1038 if ((ep = (struct evalue *)lut_lookup(sp->serdprops, 1039 (void *)"t", (lut_cmp)strcmp)) != NULL) { 1040 ASSERT(ep->t == STRING); 1041 serdt = (char *)(uintptr_t)ep->v; 1042 } 1043 if ((ep = (struct evalue *)lut_lookup(sp->serdprops, 1044 (void *)"suffix", (lut_cmp)strcmp)) != NULL) { 1045 ASSERT(ep->t == STRING); 1046 serdsuffix = (char *)(uintptr_t)ep->v; 1047 } 1048 if ((ep = (struct evalue *)lut_lookup(sp->serdprops, 1049 (void *)"increment", (lut_cmp)strcmp)) != NULL) { 1050 ASSERT(ep->t == UINT64); 1051 serdincrement = (int)ep->v; 1052 } 1053 1054 /* 1055 * obtain instanced SERD engine from the upset sp. from this 1056 * derive serdname, the string used to identify the SERD engine. 1057 */ 1058 serdinst = eventprop_lookup(sp, L_engine); 1059 1060 if (serdinst == NULL) 1061 return (-1); 1062 1063 len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1; 1064 if (serdsuffix != NULL) 1065 len += strlen(serdsuffix); 1066 serdclass = MALLOC(len); 1067 if (serdsuffix != NULL) 1068 (void) snprintf(serdclass, len, "%s%s", 1069 serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix); 1070 else 1071 (void) snprintf(serdclass, len, "%s", 1072 serdinst->u.stmt.np->u.event.ename->u.name.s); 1073 serdresource = ipath2str(NULL, 1074 ipath(serdinst->u.stmt.np->u.event.epname)); 1075 len += strlen(serdresource) + 1; 1076 serdname = MALLOC(len); 1077 (void) snprintf(serdname, len, "%s@%s", serdclass, serdresource); 1078 FREE(serdresource); 1079 1080 /* handle serd engine "id" property, if there is one */ 1081 if ((nid = 1082 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 1083 struct evalue *gval; 1084 char suffixbuf[200]; 1085 char *suffix; 1086 char *nserdname; 1087 size_t nname; 1088 1089 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 1090 ptree_name_iter(O_ALTFP|O_NONL, nid); 1091 1092 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 1093 1094 if ((gval = lut_lookup(fmep->globals, 1095 (void *)nid->u.globid.s, NULL)) == NULL) { 1096 out(O_ALTFP, " undefined"); 1097 } else if (gval->t == UINT64) { 1098 out(O_ALTFP, " %llu", gval->v); 1099 (void) sprintf(suffixbuf, "%llu", gval->v); 1100 suffix = suffixbuf; 1101 } else { 1102 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 1103 suffix = (char *)(uintptr_t)gval->v; 1104 } 1105 1106 nname = strlen(serdname) + strlen(suffix) + 2; 1107 nserdname = MALLOC(nname); 1108 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 1109 FREE(serdname); 1110 serdname = nserdname; 1111 } 1112 1113 /* 1114 * if the engine is empty, and we have an override for n/t then 1115 * destroy and recreate it. 1116 */ 1117 if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) && 1118 fmd_serd_empty(hdl, serdname)) 1119 fmd_serd_destroy(hdl, serdname); 1120 1121 if (!fmd_serd_exists(hdl, serdname)) { 1122 struct node *nN, *nT; 1123 const char *s; 1124 struct node *nodep; 1125 struct config *cp; 1126 char *path; 1127 uint_t nval; 1128 hrtime_t tval; 1129 int i; 1130 char *ptr; 1131 int got_n_override = 0, got_t_override = 0; 1132 1133 /* no SERD engine yet, so create it */ 1134 nodep = serdinst->u.stmt.np->u.event.epname; 1135 path = ipath2str(NULL, ipath(nodep)); 1136 cp = config_lookup(fmep->config, path, 0); 1137 FREE((void *)path); 1138 1139 /* 1140 * We allow serd paramaters to be overridden, either from 1141 * eft.conf file values (if Serd_Override is set) or from 1142 * driver properties (for "serd.io.device" engines). 1143 */ 1144 if (Serd_Override != NULL) { 1145 char *save_ptr, *ptr1, *ptr2, *ptr3; 1146 ptr3 = save_ptr = STRDUP(Serd_Override); 1147 while (*ptr3 != '\0') { 1148 ptr1 = strchr(ptr3, ','); 1149 *ptr1 = '\0'; 1150 if (strcmp(ptr3, serdclass) == 0) { 1151 ptr2 = strchr(ptr1 + 1, ','); 1152 *ptr2 = '\0'; 1153 nval = atoi(ptr1 + 1); 1154 out(O_ALTFP, "serd override %s_n %d", 1155 serdclass, nval); 1156 ptr3 = strchr(ptr2 + 1, ' '); 1157 if (ptr3) 1158 *ptr3 = '\0'; 1159 ptr = STRDUP(ptr2 + 1); 1160 out(O_ALTFP, "serd override %s_t %s", 1161 serdclass, ptr); 1162 got_n_override = 1; 1163 got_t_override = 1; 1164 break; 1165 } else { 1166 ptr2 = strchr(ptr1 + 1, ','); 1167 ptr3 = strchr(ptr2 + 1, ' '); 1168 if (ptr3 == NULL) 1169 break; 1170 } 1171 ptr3++; 1172 } 1173 FREE(save_ptr); 1174 } 1175 1176 if (cp && got_n_override == 0) { 1177 /* 1178 * convert serd engine class into property name 1179 */ 1180 char *prop_name = MALLOC(strlen(serdclass) + 3); 1181 for (i = 0; i < strlen(serdclass); i++) { 1182 if (serdclass[i] == '.') 1183 prop_name[i] = '_'; 1184 else 1185 prop_name[i] = serdclass[i]; 1186 } 1187 prop_name[i++] = '_'; 1188 prop_name[i++] = 'n'; 1189 prop_name[i] = '\0'; 1190 if (s = config_getprop(cp, prop_name)) { 1191 nval = atoi(s); 1192 out(O_ALTFP, "serd override %s_n %s", 1193 serdclass, s); 1194 got_n_override = 1; 1195 } 1196 prop_name[i - 1] = 't'; 1197 if (s = config_getprop(cp, prop_name)) { 1198 ptr = STRDUP(s); 1199 out(O_ALTFP, "serd override %s_t %s", 1200 serdclass, s); 1201 got_t_override = 1; 1202 } 1203 FREE(prop_name); 1204 } 1205 1206 if (serdn != -1 && got_n_override == 0) { 1207 nval = serdn; 1208 out(O_ALTFP, "serd override %s_n %d", serdclass, serdn); 1209 got_n_override = 1; 1210 } 1211 if (serdt != NULL && got_t_override == 0) { 1212 ptr = STRDUP(serdt); 1213 out(O_ALTFP, "serd override %s_t %s", serdclass, serdt); 1214 got_t_override = 1; 1215 } 1216 1217 if (!got_n_override) { 1218 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, 1219 NULL); 1220 ASSERT(nN->t == T_NUM); 1221 nval = (uint_t)nN->u.ull; 1222 } 1223 if (!got_t_override) { 1224 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, 1225 NULL); 1226 ASSERT(nT->t == T_TIMEVAL); 1227 tval = (hrtime_t)nT->u.ull; 1228 } else { 1229 const unsigned long long *ullp; 1230 const char *suffix; 1231 int len; 1232 1233 len = strspn(ptr, "0123456789"); 1234 suffix = stable(&ptr[len]); 1235 ullp = (unsigned long long *)lut_lookup(Timesuffixlut, 1236 (void *)suffix, NULL); 1237 ptr[len] = '\0'; 1238 tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll); 1239 FREE(ptr); 1240 } 1241 fmd_serd_create(hdl, serdname, nval, tval); 1242 } 1243 1244 newentp = MALLOC(sizeof (*newentp)); 1245 newentp->ename = stable(serdclass); 1246 FREE(serdclass); 1247 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname); 1248 newentp->hdl = hdl; 1249 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) { 1250 SerdEngines = lut_add(SerdEngines, (void *)newentp, 1251 (void *)newentp, (lut_cmp)serd_cmp); 1252 Serd_need_save = 1; 1253 serd_save(); 1254 } else { 1255 FREE(newentp); 1256 } 1257 1258 1259 /* 1260 * increment SERD engine. if engine fires, reset serd 1261 * engine and return trip_strcode if required. 1262 */ 1263 for (i = 0; i < serdincrement; i++) { 1264 if (fmd_serd_record(hdl, serdname, ffep)) { 1265 fmd_case_add_serd(hdl, fmcase, serdname); 1266 fmd_serd_reset(hdl, serdname); 1267 1268 if (ippp) { 1269 struct node *tripinst = 1270 lut_lookup(serdinst->u.stmt.lutp, 1271 (void *)L_trip, NULL); 1272 ASSERT(tripinst != NULL); 1273 *enamep = tripinst->u.event.ename->u.name.s; 1274 *ippp = ipath(tripinst->u.event.epname); 1275 out(O_ALTFP|O_NONL, 1276 "[engine fired: %s, sending: ", serdname); 1277 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 1278 out(O_ALTFP, "]"); 1279 } else { 1280 out(O_ALTFP, "[engine fired: %s, no trip]", 1281 serdname); 1282 } 1283 FREE(serdname); 1284 return (1); 1285 } 1286 } 1287 1288 FREE(serdname); 1289 return (0); 1290 } 1291 1292 /* 1293 * search a suspect list for upsets. feed each upset to serd_eval() and 1294 * build up tripped[], an array of ereports produced by the firing of 1295 * any SERD engines. then feed each ereport back into 1296 * fme_receive_report(). 1297 * 1298 * returns ntrip, the number of these ereports produced. 1299 */ 1300 static int 1301 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 1302 { 1303 /* we build an array of tripped ereports that we send ourselves */ 1304 struct { 1305 const char *ename; 1306 const struct ipath *ipp; 1307 } *tripped; 1308 struct event *sp; 1309 int ntrip, nupset, i; 1310 1311 /* 1312 * count the number of upsets to determine the upper limit on 1313 * expected trip ereport strings. remember that one upset can 1314 * lead to at most one ereport. 1315 */ 1316 nupset = 0; 1317 for (sp = fmep->suspects; sp; sp = sp->suspects) { 1318 if (sp->t == N_UPSET) 1319 nupset++; 1320 } 1321 1322 if (nupset == 0) 1323 return (0); 1324 1325 /* 1326 * get to this point if we have upsets and expect some trip 1327 * ereports 1328 */ 1329 tripped = alloca(sizeof (*tripped) * nupset); 1330 bzero((void *)tripped, sizeof (*tripped) * nupset); 1331 1332 ntrip = 0; 1333 for (sp = fmep->suspects; sp; sp = sp->suspects) 1334 if (sp->t == N_UPSET && 1335 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 1336 &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1) 1337 ntrip++; 1338 1339 for (i = 0; i < ntrip; i++) { 1340 struct event *ep, *nep; 1341 struct fme *nfmep; 1342 fmd_case_t *fmcase; 1343 const struct ipath *ipp; 1344 const char *eventstring; 1345 int prev_verbose; 1346 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1347 enum fme_state state; 1348 1349 /* 1350 * First try and evaluate a case with the trip ereport plus 1351 * all the other ereports that cause the trip. If that fails 1352 * to evaluate then try again with just this ereport on its own. 1353 */ 1354 out(O_ALTFP|O_NONL, "fme_receive_report_serd: "); 1355 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1356 out(O_ALTFP|O_STAMP, NULL); 1357 ep = fmep->e0; 1358 eventstring = ep->enode->u.event.ename->u.name.s; 1359 ipp = ep->ipp; 1360 1361 /* 1362 * create a duplicate fme and case 1363 */ 1364 fmcase = fmd_case_open(fmep->hdl, NULL); 1365 out(O_ALTFP|O_NONL, "duplicate fme for event ["); 1366 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1367 out(O_ALTFP, " ]"); 1368 1369 if ((nfmep = newfme(eventstring, ipp, fmep->hdl, 1370 fmcase, ffep, ep->nvp)) == NULL) { 1371 out(O_ALTFP|O_NONL, "["); 1372 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1373 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1374 continue; 1375 } 1376 1377 Open_fme_count++; 1378 nfmep->pull = fmep->pull; 1379 init_fme_bufs(nfmep); 1380 out(O_ALTFP|O_NONL, "["); 1381 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1382 out(O_ALTFP, " created FME%d, case %s]", nfmep->id, 1383 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 1384 if (ffep) { 1385 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep); 1386 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep); 1387 nfmep->e0r = ffep; 1388 } 1389 1390 /* 1391 * add the original ereports 1392 */ 1393 for (ep = fmep->observations; ep; ep = ep->observations) { 1394 eventstring = ep->enode->u.event.ename->u.name.s; 1395 ipp = ep->ipp; 1396 out(O_ALTFP|O_NONL, "adding event ["); 1397 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1398 out(O_ALTFP, " ]"); 1399 nep = itree_lookup(nfmep->eventtree, eventstring, ipp); 1400 if (nep->count++ == 0) { 1401 nep->observations = nfmep->observations; 1402 nfmep->observations = nep; 1403 serialize_observation(nfmep, eventstring, ipp); 1404 nep->nvp = evnv_dupnvl(ep->nvp); 1405 } 1406 if (ep->ffep && ep->ffep != ffep) 1407 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, 1408 ep->ffep); 1409 stats_counter_bump(nfmep->Rcount); 1410 } 1411 1412 /* 1413 * add the serd trigger ereport 1414 */ 1415 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename, 1416 tripped[i].ipp)) == NULL) { 1417 /* 1418 * The trigger ereport is not in the instance tree. It 1419 * was presumably removed by prune_propagations() as 1420 * this combination of events is not present in the 1421 * rules. 1422 */ 1423 out(O_ALTFP, "upsets_eval: e0 not in instance tree"); 1424 Undiag_reason = UD_VAL_BADEVENTI; 1425 goto retry_lone_ereport; 1426 } 1427 out(O_ALTFP|O_NONL, "adding event ["); 1428 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1429 out(O_ALTFP, " ]"); 1430 nfmep->ecurrent = ep; 1431 ep->nvp = NULL; 1432 ep->count = 1; 1433 ep->observations = nfmep->observations; 1434 nfmep->observations = ep; 1435 1436 /* 1437 * just peek first. 1438 */ 1439 nfmep->peek = 1; 1440 prev_verbose = Verbose; 1441 if (Debug == 0) 1442 Verbose = 0; 1443 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep); 1444 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay); 1445 nfmep->peek = 0; 1446 Verbose = prev_verbose; 1447 if (state == FME_DISPROVED) { 1448 out(O_ALTFP, "upsets_eval: hypothesis disproved"); 1449 Undiag_reason = UD_VAL_UNSOLVD; 1450 retry_lone_ereport: 1451 /* 1452 * However the trigger ereport on its own might be 1453 * diagnosable, so check for that. Undo the new fme 1454 * and case we just created and call fme_receive_report. 1455 */ 1456 out(O_ALTFP|O_NONL, "["); 1457 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, 1458 tripped[i].ipp); 1459 out(O_ALTFP, " retrying with just trigger ereport]"); 1460 itree_free(nfmep->eventtree); 1461 nfmep->eventtree = NULL; 1462 structconfig_free(nfmep->config); 1463 nfmep->config = NULL; 1464 destroy_fme_bufs(nfmep); 1465 fmd_case_close(nfmep->hdl, nfmep->fmcase); 1466 fme_receive_report(fmep->hdl, ffep, 1467 tripped[i].ename, tripped[i].ipp, NULL); 1468 continue; 1469 } 1470 1471 /* 1472 * and evaluate 1473 */ 1474 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp); 1475 fme_eval(nfmep, ffep); 1476 } 1477 1478 return (ntrip); 1479 } 1480 1481 /* 1482 * fme_receive_external_report -- call when an external ereport comes in 1483 * 1484 * this routine just converts the relevant information from the ereport 1485 * into a format used internally and passes it on to fme_receive_report(). 1486 */ 1487 void 1488 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1489 const char *class) 1490 { 1491 struct node *epnamenp; 1492 fmd_case_t *fmcase; 1493 const struct ipath *ipp; 1494 nvlist_t *detector = NULL; 1495 1496 class = stable(class); 1497 1498 /* Get the component path from the ereport */ 1499 epnamenp = platform_getpath(nvl); 1500 1501 /* See if we ended up without a path. */ 1502 if (epnamenp == NULL) { 1503 /* See if class permits silent discard on unknown component. */ 1504 if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) { 1505 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport " 1506 "to component path, but silent discard allowed.", 1507 class); 1508 } else { 1509 /* 1510 * XFILE: Failure to find a component is bad unless 1511 * 'discard_if_config_unknown=1' was specified in the 1512 * ereport definition. Indicate undiagnosable. 1513 */ 1514 Undiag_reason = UD_VAL_NOPATH; 1515 fmcase = fmd_case_open(hdl, NULL); 1516 1517 /* 1518 * We don't have a component path here (which means that 1519 * the detector was not in hc-scheme and couldn't be 1520 * converted to hc-scheme. Report the raw detector as 1521 * the suspect resource if there is one. 1522 */ 1523 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, 1524 &detector); 1525 publish_undiagnosable(hdl, ffep, fmcase, detector, 1526 (char *)class); 1527 } 1528 return; 1529 } 1530 1531 ipp = ipath(epnamenp); 1532 tree_free(epnamenp); 1533 fme_receive_report(hdl, ffep, class, ipp, nvl); 1534 } 1535 1536 /*ARGSUSED*/ 1537 void 1538 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1539 const char *eventstring) 1540 { 1541 char *uuid; 1542 nvlist_t **nva; 1543 uint_t nvc; 1544 const struct ipath *ipp; 1545 1546 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 1547 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1548 &nva, &nvc) != 0) { 1549 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 1550 return; 1551 } 1552 1553 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 1554 1555 while (nvc-- != 0) { 1556 /* 1557 * Reset any istat or serd engine associated with this path. 1558 */ 1559 char *path; 1560 1561 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 1562 continue; 1563 1564 path = ipath2str(NULL, ipp); 1565 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1566 path); 1567 FREE(path); 1568 1569 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1570 istat_save(); 1571 1572 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp); 1573 serd_save(); 1574 } 1575 } 1576 1577 /*ARGSUSED*/ 1578 void 1579 fme_receive_topology_change(void) 1580 { 1581 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL); 1582 istat_save(); 1583 1584 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL); 1585 serd_save(); 1586 } 1587 1588 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1589 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1590 1591 /* ARGSUSED */ 1592 static void 1593 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1594 { 1595 struct bubble *bp; 1596 struct arrowlist *ap; 1597 1598 ep->cached_state = 0; 1599 ep->keep_in_tree = 0; 1600 for (bp = itree_next_bubble(ep, NULL); bp; 1601 bp = itree_next_bubble(ep, bp)) { 1602 if (bp->t != B_FROM) 1603 continue; 1604 bp->mark = 0; 1605 for (ap = itree_next_arrow(bp, NULL); ap; 1606 ap = itree_next_arrow(bp, ap)) 1607 ap->arrowp->mark = 0; 1608 } 1609 } 1610 1611 static void 1612 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1613 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1614 { 1615 struct event *ep; 1616 struct fme *fmep = NULL; 1617 struct fme *ofmep = NULL; 1618 struct fme *cfmep, *svfmep; 1619 int matched = 0; 1620 nvlist_t *defect; 1621 fmd_case_t *fmcase; 1622 char *reason; 1623 1624 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1625 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1626 out(O_ALTFP|O_STAMP, NULL); 1627 1628 /* decide which FME it goes to */ 1629 for (fmep = FMElist; fmep; fmep = fmep->next) { 1630 int prev_verbose; 1631 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1632 enum fme_state state; 1633 nvlist_t *pre_peek_nvp = NULL; 1634 1635 if (fmep->overflow) { 1636 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1637 ofmep = fmep; 1638 1639 continue; 1640 } 1641 1642 /* 1643 * ignore solved or closed cases 1644 */ 1645 if (fmep->posted_suspects || 1646 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1647 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1648 continue; 1649 1650 /* look up event in event tree for this FME */ 1651 if ((ep = itree_lookup(fmep->eventtree, 1652 eventstring, ipp)) == NULL) 1653 continue; 1654 1655 /* note observation */ 1656 fmep->ecurrent = ep; 1657 if (ep->count++ == 0) { 1658 /* link it into list of observations seen */ 1659 ep->observations = fmep->observations; 1660 fmep->observations = ep; 1661 ep->nvp = evnv_dupnvl(nvl); 1662 } else { 1663 /* use new payload values for peek */ 1664 pre_peek_nvp = ep->nvp; 1665 ep->nvp = evnv_dupnvl(nvl); 1666 } 1667 1668 /* tell hypothesise() not to mess with suspect list */ 1669 fmep->peek = 1; 1670 1671 /* don't want this to be verbose (unless Debug is set) */ 1672 prev_verbose = Verbose; 1673 if (Debug == 0) 1674 Verbose = 0; 1675 1676 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1677 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1678 1679 fmep->peek = 0; 1680 1681 /* put verbose flag back */ 1682 Verbose = prev_verbose; 1683 1684 if (state != FME_DISPROVED) { 1685 /* found an FME that explains the ereport */ 1686 matched++; 1687 out(O_ALTFP|O_NONL, "["); 1688 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1689 out(O_ALTFP, " explained by FME%d]", fmep->id); 1690 1691 if (pre_peek_nvp) 1692 nvlist_free(pre_peek_nvp); 1693 1694 if (ep->count == 1) 1695 serialize_observation(fmep, eventstring, ipp); 1696 1697 if (ffep) { 1698 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1699 ep->ffep = ffep; 1700 } 1701 1702 stats_counter_bump(fmep->Rcount); 1703 1704 /* re-eval FME */ 1705 fme_eval(fmep, ffep); 1706 } else { 1707 1708 /* not a match, undo noting of observation */ 1709 fmep->ecurrent = NULL; 1710 if (--ep->count == 0) { 1711 /* unlink it from observations */ 1712 fmep->observations = ep->observations; 1713 ep->observations = NULL; 1714 nvlist_free(ep->nvp); 1715 ep->nvp = NULL; 1716 } else { 1717 nvlist_free(ep->nvp); 1718 ep->nvp = pre_peek_nvp; 1719 } 1720 } 1721 } 1722 1723 if (matched) 1724 return; /* explained by at least one existing FME */ 1725 1726 /* clean up closed fmes */ 1727 cfmep = ClosedFMEs; 1728 while (cfmep != NULL) { 1729 svfmep = cfmep->next; 1730 destroy_fme(cfmep); 1731 cfmep = svfmep; 1732 } 1733 ClosedFMEs = NULL; 1734 1735 if (ofmep) { 1736 out(O_ALTFP|O_NONL, "["); 1737 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1738 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1739 if (ffep) 1740 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1741 1742 return; 1743 1744 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1745 out(O_ALTFP|O_NONL, "["); 1746 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1747 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1748 1749 fmcase = fmd_case_open(hdl, NULL); 1750 1751 /* Create overflow fme */ 1752 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, 1753 nvl)) == NULL) { 1754 out(O_ALTFP|O_NONL, "["); 1755 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1756 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1757 return; 1758 } 1759 1760 Open_fme_count++; 1761 1762 init_fme_bufs(fmep); 1763 fmep->overflow = B_TRUE; 1764 1765 if (ffep) 1766 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1767 1768 Undiag_reason = UD_VAL_MAXFME; 1769 defect = fmd_nvl_create_fault(hdl, 1770 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL); 1771 reason = undiag_2reason_str(Undiag_reason, NULL); 1772 (void) nvlist_add_string(defect, UNDIAG_REASON, reason); 1773 FREE(reason); 1774 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1775 fmd_case_solve(hdl, fmep->fmcase); 1776 Undiag_reason = UD_VAL_UNKNOWN; 1777 return; 1778 } 1779 1780 /* open a case */ 1781 fmcase = fmd_case_open(hdl, NULL); 1782 1783 /* start a new FME */ 1784 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) { 1785 out(O_ALTFP|O_NONL, "["); 1786 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1787 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1788 return; 1789 } 1790 1791 Open_fme_count++; 1792 1793 init_fme_bufs(fmep); 1794 1795 out(O_ALTFP|O_NONL, "["); 1796 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1797 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1798 fmd_case_uuid(hdl, fmep->fmcase)); 1799 1800 ep = fmep->e0; 1801 ASSERT(ep != NULL); 1802 1803 /* note observation */ 1804 fmep->ecurrent = ep; 1805 if (ep->count++ == 0) { 1806 /* link it into list of observations seen */ 1807 ep->observations = fmep->observations; 1808 fmep->observations = ep; 1809 ep->nvp = evnv_dupnvl(nvl); 1810 serialize_observation(fmep, eventstring, ipp); 1811 } else { 1812 /* new payload overrides any previous */ 1813 nvlist_free(ep->nvp); 1814 ep->nvp = evnv_dupnvl(nvl); 1815 } 1816 1817 stats_counter_bump(fmep->Rcount); 1818 1819 if (ffep) { 1820 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1821 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1822 fmep->e0r = ffep; 1823 ep->ffep = ffep; 1824 } 1825 1826 /* give the diagnosis algorithm a shot at the new FME state */ 1827 fme_eval(fmep, ffep); 1828 } 1829 1830 void 1831 fme_status(int flags) 1832 { 1833 struct fme *fmep; 1834 1835 if (FMElist == NULL) { 1836 out(flags, "No fault management exercises underway."); 1837 return; 1838 } 1839 1840 for (fmep = FMElist; fmep; fmep = fmep->next) 1841 fme_print(flags, fmep); 1842 } 1843 1844 /* 1845 * "indent" routines used mostly for nicely formatted debug output, but also 1846 * for sanity checking for infinite recursion bugs. 1847 */ 1848 1849 #define MAX_INDENT 1024 1850 static const char *indent_s[MAX_INDENT]; 1851 static int current_indent; 1852 1853 static void 1854 indent_push(const char *s) 1855 { 1856 if (current_indent < MAX_INDENT) 1857 indent_s[current_indent++] = s; 1858 else 1859 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1860 } 1861 1862 static void 1863 indent_set(const char *s) 1864 { 1865 current_indent = 0; 1866 indent_push(s); 1867 } 1868 1869 static void 1870 indent_pop(void) 1871 { 1872 if (current_indent > 0) 1873 current_indent--; 1874 else 1875 out(O_DIE, "recursion underflow"); 1876 } 1877 1878 static void 1879 indent(void) 1880 { 1881 int i; 1882 if (!Verbose) 1883 return; 1884 for (i = 0; i < current_indent; i++) 1885 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1886 } 1887 1888 #define SLNEW 1 1889 #define SLCHANGED 2 1890 #define SLWAIT 3 1891 #define SLDISPROVED 4 1892 1893 static void 1894 print_suspects(int circumstance, struct fme *fmep) 1895 { 1896 struct event *ep; 1897 1898 out(O_ALTFP|O_NONL, "["); 1899 if (circumstance == SLCHANGED) { 1900 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1901 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1902 } else if (circumstance == SLWAIT) { 1903 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id, 1904 fmep->timer); 1905 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1906 } else if (circumstance == SLDISPROVED) { 1907 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1908 } else { 1909 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1910 } 1911 1912 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1913 out(O_ALTFP, "]"); 1914 return; 1915 } 1916 1917 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1918 out(O_ALTFP|O_NONL, " "); 1919 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1920 } 1921 out(O_ALTFP, "]"); 1922 } 1923 1924 static struct node * 1925 eventprop_lookup(struct event *ep, const char *propname) 1926 { 1927 return (lut_lookup(ep->props, (void *)propname, NULL)); 1928 } 1929 1930 #define MAXDIGITIDX 23 1931 static char numbuf[MAXDIGITIDX + 1]; 1932 1933 static int 1934 node2uint(struct node *n, uint_t *valp) 1935 { 1936 struct evalue value; 1937 struct lut *globals = NULL; 1938 1939 if (n == NULL) 1940 return (1); 1941 1942 /* 1943 * check value.v since we are being asked to convert an unsigned 1944 * long long int to an unsigned int 1945 */ 1946 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1947 value.t != UINT64 || value.v > (1ULL << 32)) 1948 return (1); 1949 1950 *valp = (uint_t)value.v; 1951 1952 return (0); 1953 } 1954 1955 static nvlist_t * 1956 node2fmri(struct node *n) 1957 { 1958 nvlist_t **pa, *f, *p; 1959 struct node *nc; 1960 uint_t depth = 0; 1961 char *numstr, *nullbyte; 1962 char *failure; 1963 int err, i; 1964 1965 /* XXX do we need to be able to handle a non-T_NAME node? */ 1966 if (n == NULL || n->t != T_NAME) 1967 return (NULL); 1968 1969 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1970 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1971 break; 1972 depth++; 1973 } 1974 1975 if (nc != NULL) { 1976 /* We bailed early, something went wrong */ 1977 return (NULL); 1978 } 1979 1980 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1981 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1982 pa = alloca(depth * sizeof (nvlist_t *)); 1983 for (i = 0; i < depth; i++) 1984 pa[i] = NULL; 1985 1986 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1987 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1988 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1989 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1990 if (err != 0) { 1991 failure = "basic construction of FMRI failed"; 1992 goto boom; 1993 } 1994 1995 numbuf[MAXDIGITIDX] = '\0'; 1996 nullbyte = &numbuf[MAXDIGITIDX]; 1997 i = 0; 1998 1999 for (nc = n; nc != NULL; nc = nc->u.name.next) { 2000 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 2001 if (err != 0) { 2002 failure = "alloc of an hc-pair failed"; 2003 goto boom; 2004 } 2005 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 2006 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 2007 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 2008 if (err != 0) { 2009 failure = "construction of an hc-pair failed"; 2010 goto boom; 2011 } 2012 pa[i++] = p; 2013 } 2014 2015 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 2016 if (err == 0) { 2017 for (i = 0; i < depth; i++) 2018 if (pa[i] != NULL) 2019 nvlist_free(pa[i]); 2020 return (f); 2021 } 2022 failure = "addition of hc-pair array to FMRI failed"; 2023 2024 boom: 2025 for (i = 0; i < depth; i++) 2026 if (pa[i] != NULL) 2027 nvlist_free(pa[i]); 2028 nvlist_free(f); 2029 out(O_DIE, "%s", failure); 2030 /*NOTREACHED*/ 2031 return (NULL); 2032 } 2033 2034 /* an ipath cache entry is an array of these, with s==NULL at the end */ 2035 struct ipath { 2036 const char *s; /* component name (in stable) */ 2037 int i; /* instance number */ 2038 }; 2039 2040 static nvlist_t * 2041 ipath2fmri(struct ipath *ipath) 2042 { 2043 nvlist_t **pa, *f, *p; 2044 uint_t depth = 0; 2045 char *numstr, *nullbyte; 2046 char *failure; 2047 int err, i; 2048 struct ipath *ipp; 2049 2050 for (ipp = ipath; ipp->s != NULL; ipp++) 2051 depth++; 2052 2053 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 2054 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 2055 pa = alloca(depth * sizeof (nvlist_t *)); 2056 for (i = 0; i < depth; i++) 2057 pa[i] = NULL; 2058 2059 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 2060 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 2061 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 2062 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 2063 if (err != 0) { 2064 failure = "basic construction of FMRI failed"; 2065 goto boom; 2066 } 2067 2068 numbuf[MAXDIGITIDX] = '\0'; 2069 nullbyte = &numbuf[MAXDIGITIDX]; 2070 i = 0; 2071 2072 for (ipp = ipath; ipp->s != NULL; ipp++) { 2073 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 2074 if (err != 0) { 2075 failure = "alloc of an hc-pair failed"; 2076 goto boom; 2077 } 2078 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s); 2079 numstr = ulltostr(ipp->i, nullbyte); 2080 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 2081 if (err != 0) { 2082 failure = "construction of an hc-pair failed"; 2083 goto boom; 2084 } 2085 pa[i++] = p; 2086 } 2087 2088 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 2089 if (err == 0) { 2090 for (i = 0; i < depth; i++) 2091 if (pa[i] != NULL) 2092 nvlist_free(pa[i]); 2093 return (f); 2094 } 2095 failure = "addition of hc-pair array to FMRI failed"; 2096 2097 boom: 2098 for (i = 0; i < depth; i++) 2099 if (pa[i] != NULL) 2100 nvlist_free(pa[i]); 2101 nvlist_free(f); 2102 out(O_DIE, "%s", failure); 2103 /*NOTREACHED*/ 2104 return (NULL); 2105 } 2106 2107 static uint8_t 2108 percentof(uint_t part, uint_t whole) 2109 { 2110 unsigned long long p = part * 1000; 2111 2112 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 2113 } 2114 2115 struct rsl { 2116 struct event *suspect; 2117 nvlist_t *asru; 2118 nvlist_t *fru; 2119 nvlist_t *rsrc; 2120 }; 2121 2122 static void publish_suspects(struct fme *fmep, struct rsl *srl); 2123 2124 /* 2125 * rslfree -- free internal members of struct rsl not expected to be 2126 * freed elsewhere. 2127 */ 2128 static void 2129 rslfree(struct rsl *freeme) 2130 { 2131 if (freeme->asru != NULL) 2132 nvlist_free(freeme->asru); 2133 if (freeme->fru != NULL) 2134 nvlist_free(freeme->fru); 2135 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 2136 nvlist_free(freeme->rsrc); 2137 } 2138 2139 /* 2140 * rslcmp -- compare two rsl structures. Use the following 2141 * comparisons to establish cardinality: 2142 * 2143 * 1. Name of the suspect's class. (simple strcmp) 2144 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 2145 * 2146 */ 2147 static int 2148 rslcmp(const void *a, const void *b) 2149 { 2150 struct rsl *r1 = (struct rsl *)a; 2151 struct rsl *r2 = (struct rsl *)b; 2152 int rv; 2153 2154 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 2155 r2->suspect->enode->u.event.ename->u.name.s); 2156 if (rv != 0) 2157 return (rv); 2158 2159 if (r1->rsrc == NULL && r2->rsrc == NULL) 2160 return (0); 2161 if (r1->rsrc == NULL) 2162 return (-1); 2163 if (r2->rsrc == NULL) 2164 return (1); 2165 return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0)); 2166 } 2167 2168 /* 2169 * get_resources -- for a given suspect, determine what ASRU, FRU and 2170 * RSRC nvlists should be advertised in the final suspect list. 2171 */ 2172 void 2173 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 2174 { 2175 struct node *asrudef, *frudef; 2176 nvlist_t *asru, *fru; 2177 nvlist_t *rsrc = NULL; 2178 char *pathstr; 2179 2180 /* 2181 * First find any ASRU and/or FRU defined in the 2182 * initial fault tree. 2183 */ 2184 asrudef = eventprop_lookup(sp, L_ASRU); 2185 frudef = eventprop_lookup(sp, L_FRU); 2186 2187 /* 2188 * Create FMRIs based on those definitions 2189 */ 2190 asru = node2fmri(asrudef); 2191 fru = node2fmri(frudef); 2192 pathstr = ipath2str(NULL, sp->ipp); 2193 2194 /* 2195 * Allow for platform translations of the FMRIs 2196 */ 2197 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 2198 pathstr); 2199 2200 FREE(pathstr); 2201 rsrcs->suspect = sp; 2202 rsrcs->asru = asru; 2203 rsrcs->fru = fru; 2204 rsrcs->rsrc = rsrc; 2205 } 2206 2207 /* 2208 * trim_suspects -- prior to publishing, we may need to remove some 2209 * suspects from the list. If we're auto-closing upsets, we don't 2210 * want any of those in the published list. If the ASRUs for multiple 2211 * defects resolve to the same ASRU (driver) we only want to publish 2212 * that as a single suspect. 2213 */ 2214 static int 2215 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2, 2216 fmd_event_t *ffep) 2217 { 2218 struct event *ep; 2219 struct rsl *rp = begin; 2220 struct rsl *rp2 = begin2; 2221 int mess_zero_count = 0; 2222 int serd_rval; 2223 uint_t messval; 2224 2225 /* remove any unwanted upsets and populate our array */ 2226 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 2227 if (is_upset(ep->t)) 2228 continue; 2229 serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep, 2230 NULL, NULL); 2231 if (serd_rval == 0) 2232 continue; 2233 if (node2uint(eventprop_lookup(ep, L_message), 2234 &messval) == 0 && messval == 0) { 2235 get_resources(ep, rp2, fmep->config); 2236 rp2++; 2237 mess_zero_count++; 2238 } else { 2239 get_resources(ep, rp, fmep->config); 2240 rp++; 2241 fmep->nsuspects++; 2242 } 2243 } 2244 return (mess_zero_count); 2245 } 2246 2247 /* 2248 * addpayloadprop -- add a payload prop to a problem 2249 */ 2250 static void 2251 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 2252 { 2253 nvlist_t *rsrc, *hcs; 2254 2255 ASSERT(fault != NULL); 2256 ASSERT(lhs != NULL); 2257 ASSERT(rhs != NULL); 2258 2259 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0) 2260 out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs); 2261 2262 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) { 2263 out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific"); 2264 if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0) 2265 out(O_DIE, 2266 "cannot add payloadprop \"%s\" to fault", lhs); 2267 if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0) 2268 out(O_DIE, 2269 "cannot add payloadprop \"%s\" to fault", lhs); 2270 nvlist_free(hcs); 2271 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) 2272 out(O_DIE, 2273 "cannot add payloadprop \"%s\" to fault", lhs); 2274 } else 2275 out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific"); 2276 2277 if (rhs->t == UINT64) { 2278 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 2279 2280 if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0) 2281 out(O_DIE, 2282 "cannot add payloadprop \"%s\" to fault", lhs); 2283 } else { 2284 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 2285 lhs, (char *)(uintptr_t)rhs->v); 2286 2287 if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0) 2288 out(O_DIE, 2289 "cannot add payloadprop \"%s\" to fault", lhs); 2290 } 2291 } 2292 2293 static char *Istatbuf; 2294 static char *Istatbufptr; 2295 static int Istatsz; 2296 2297 /* 2298 * istataddsize -- calculate size of istat and add it to Istatsz 2299 */ 2300 /*ARGSUSED2*/ 2301 static void 2302 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2303 { 2304 int val; 2305 2306 ASSERT(lhs != NULL); 2307 ASSERT(rhs != NULL); 2308 2309 if ((val = stats_counter_value(rhs)) == 0) 2310 return; /* skip zero-valued stats */ 2311 2312 /* count up the size of the stat name */ 2313 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 2314 Istatsz++; /* for the trailing NULL byte */ 2315 2316 /* count up the size of the stat value */ 2317 Istatsz += snprintf(NULL, 0, "%d", val); 2318 Istatsz++; /* for the trailing NULL byte */ 2319 } 2320 2321 /* 2322 * istat2str -- serialize an istat, writing result to *Istatbufptr 2323 */ 2324 /*ARGSUSED2*/ 2325 static void 2326 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2327 { 2328 char *str; 2329 int len; 2330 int val; 2331 2332 ASSERT(lhs != NULL); 2333 ASSERT(rhs != NULL); 2334 2335 if ((val = stats_counter_value(rhs)) == 0) 2336 return; /* skip zero-valued stats */ 2337 2338 /* serialize the stat name */ 2339 str = ipath2str(lhs->ename, lhs->ipath); 2340 len = strlen(str); 2341 2342 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 2343 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 2344 Istatbufptr += len; 2345 FREE(str); 2346 *Istatbufptr++ = '\0'; 2347 2348 /* serialize the stat value */ 2349 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 2350 "%d", val); 2351 *Istatbufptr++ = '\0'; 2352 2353 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 2354 } 2355 2356 void 2357 istat_save() 2358 { 2359 if (Istat_need_save == 0) 2360 return; 2361 2362 /* figure out how big the serialzed info is */ 2363 Istatsz = 0; 2364 lut_walk(Istats, (lut_cb)istataddsize, NULL); 2365 2366 if (Istatsz == 0) { 2367 /* no stats to save */ 2368 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2369 return; 2370 } 2371 2372 /* create the serialized buffer */ 2373 Istatbufptr = Istatbuf = MALLOC(Istatsz); 2374 lut_walk(Istats, (lut_cb)istat2str, NULL); 2375 2376 /* clear out current saved stats */ 2377 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2378 2379 /* write out the new version */ 2380 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 2381 FREE(Istatbuf); 2382 2383 Istat_need_save = 0; 2384 } 2385 2386 int 2387 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 2388 { 2389 if (ent1->ename != ent2->ename) 2390 return (ent2->ename - ent1->ename); 2391 if (ent1->ipath != ent2->ipath) 2392 return ((char *)ent2->ipath - (char *)ent1->ipath); 2393 2394 return (0); 2395 } 2396 2397 /* 2398 * istat-verify -- verify the component associated with a stat still exists 2399 * 2400 * if the component no longer exists, this routine resets the stat and 2401 * returns 0. if the component still exists, it returns 1. 2402 */ 2403 static int 2404 istat_verify(struct node *snp, struct istat_entry *entp) 2405 { 2406 struct stats *statp; 2407 nvlist_t *fmri; 2408 2409 fmri = node2fmri(snp->u.event.epname); 2410 if (platform_path_exists(fmri)) { 2411 nvlist_free(fmri); 2412 return (1); 2413 } 2414 nvlist_free(fmri); 2415 2416 /* component no longer in system. zero out the associated stats */ 2417 if ((statp = (struct stats *) 2418 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 2419 stats_counter_value(statp) == 0) 2420 return (0); /* stat is already reset */ 2421 2422 Istat_need_save = 1; 2423 stats_counter_reset(statp); 2424 return (0); 2425 } 2426 2427 static void 2428 istat_bump(struct node *snp, int n) 2429 { 2430 struct stats *statp; 2431 struct istat_entry ent; 2432 2433 ASSERT(snp != NULL); 2434 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 2435 ASSERT(snp->u.event.epname != NULL); 2436 2437 /* class name should be hoisted into a single stable entry */ 2438 ASSERT(snp->u.event.ename->u.name.next == NULL); 2439 ent.ename = snp->u.event.ename->u.name.s; 2440 ent.ipath = ipath(snp->u.event.epname); 2441 2442 if (!istat_verify(snp, &ent)) { 2443 /* component no longer exists in system, nothing to do */ 2444 return; 2445 } 2446 2447 if ((statp = (struct stats *) 2448 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 2449 /* need to create the counter */ 2450 int cnt = 0; 2451 struct node *np; 2452 char *sname; 2453 char *snamep; 2454 struct istat_entry *newentp; 2455 2456 /* count up the size of the stat name */ 2457 np = snp->u.event.ename; 2458 while (np != NULL) { 2459 cnt += strlen(np->u.name.s); 2460 cnt++; /* for the '.' or '@' */ 2461 np = np->u.name.next; 2462 } 2463 np = snp->u.event.epname; 2464 while (np != NULL) { 2465 cnt += snprintf(NULL, 0, "%s%llu", 2466 np->u.name.s, np->u.name.child->u.ull); 2467 cnt++; /* for the '/' or trailing NULL byte */ 2468 np = np->u.name.next; 2469 } 2470 2471 /* build the stat name */ 2472 snamep = sname = alloca(cnt); 2473 np = snp->u.event.ename; 2474 while (np != NULL) { 2475 snamep += snprintf(snamep, &sname[cnt] - snamep, 2476 "%s", np->u.name.s); 2477 np = np->u.name.next; 2478 if (np) 2479 *snamep++ = '.'; 2480 } 2481 *snamep++ = '@'; 2482 np = snp->u.event.epname; 2483 while (np != NULL) { 2484 snamep += snprintf(snamep, &sname[cnt] - snamep, 2485 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 2486 np = np->u.name.next; 2487 if (np) 2488 *snamep++ = '/'; 2489 } 2490 *snamep++ = '\0'; 2491 2492 /* create the new stat & add it to our list */ 2493 newentp = MALLOC(sizeof (*newentp)); 2494 *newentp = ent; 2495 statp = stats_new_counter(NULL, sname, 0); 2496 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 2497 (lut_cmp)istat_cmp); 2498 } 2499 2500 /* if n is non-zero, set that value instead of bumping */ 2501 if (n) { 2502 stats_counter_reset(statp); 2503 stats_counter_add(statp, n); 2504 } else 2505 stats_counter_bump(statp); 2506 Istat_need_save = 1; 2507 2508 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 2509 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 2510 stats_counter_value(statp)); 2511 } 2512 2513 /*ARGSUSED*/ 2514 static void 2515 istat_destructor(void *left, void *right, void *arg) 2516 { 2517 struct istat_entry *entp = (struct istat_entry *)left; 2518 struct stats *statp = (struct stats *)right; 2519 FREE(entp); 2520 stats_delete(statp); 2521 } 2522 2523 /* 2524 * Callback used in a walk of the Istats to reset matching stat counters. 2525 */ 2526 static void 2527 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 2528 const struct ipath *ipp) 2529 { 2530 char *path; 2531 2532 if (entp->ipath == ipp) { 2533 path = ipath2str(entp->ename, ipp); 2534 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 2535 FREE(path); 2536 stats_counter_reset(statp); 2537 Istat_need_save = 1; 2538 } 2539 } 2540 2541 /*ARGSUSED*/ 2542 static void 2543 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp, 2544 void *unused) 2545 { 2546 char *path; 2547 nvlist_t *fmri; 2548 2549 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2550 if (!platform_path_exists(fmri)) { 2551 path = ipath2str(entp->ename, entp->ipath); 2552 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path); 2553 FREE(path); 2554 stats_counter_reset(statp); 2555 Istat_need_save = 1; 2556 } 2557 nvlist_free(fmri); 2558 } 2559 2560 void 2561 istat_fini(void) 2562 { 2563 lut_free(Istats, istat_destructor, NULL); 2564 } 2565 2566 static char *Serdbuf; 2567 static char *Serdbufptr; 2568 static int Serdsz; 2569 2570 /* 2571 * serdaddsize -- calculate size of serd and add it to Serdsz 2572 */ 2573 /*ARGSUSED*/ 2574 static void 2575 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2576 { 2577 ASSERT(lhs != NULL); 2578 2579 /* count up the size of the stat name */ 2580 Serdsz += ipath2strlen(lhs->ename, lhs->ipath); 2581 Serdsz++; /* for the trailing NULL byte */ 2582 } 2583 2584 /* 2585 * serd2str -- serialize a serd engine, writing result to *Serdbufptr 2586 */ 2587 /*ARGSUSED*/ 2588 static void 2589 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2590 { 2591 char *str; 2592 int len; 2593 2594 ASSERT(lhs != NULL); 2595 2596 /* serialize the serd engine name */ 2597 str = ipath2str(lhs->ename, lhs->ipath); 2598 len = strlen(str); 2599 2600 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]); 2601 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr); 2602 Serdbufptr += len; 2603 FREE(str); 2604 *Serdbufptr++ = '\0'; 2605 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]); 2606 } 2607 2608 void 2609 serd_save() 2610 { 2611 if (Serd_need_save == 0) 2612 return; 2613 2614 /* figure out how big the serialzed info is */ 2615 Serdsz = 0; 2616 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL); 2617 2618 if (Serdsz == 0) { 2619 /* no serd engines to save */ 2620 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2621 return; 2622 } 2623 2624 /* create the serialized buffer */ 2625 Serdbufptr = Serdbuf = MALLOC(Serdsz); 2626 lut_walk(SerdEngines, (lut_cb)serd2str, NULL); 2627 2628 /* clear out current saved stats */ 2629 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2630 2631 /* write out the new version */ 2632 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz); 2633 FREE(Serdbuf); 2634 Serd_need_save = 0; 2635 } 2636 2637 int 2638 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2) 2639 { 2640 if (ent1->ename != ent2->ename) 2641 return (ent2->ename - ent1->ename); 2642 if (ent1->ipath != ent2->ipath) 2643 return ((char *)ent2->ipath - (char *)ent1->ipath); 2644 2645 return (0); 2646 } 2647 2648 void 2649 fme_serd_load(fmd_hdl_t *hdl) 2650 { 2651 int sz; 2652 char *sbuf; 2653 char *sepptr; 2654 char *ptr; 2655 struct serd_entry *newentp; 2656 struct node *epname; 2657 nvlist_t *fmri; 2658 char *namestring; 2659 2660 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0) 2661 return; 2662 sbuf = alloca(sz); 2663 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz); 2664 ptr = sbuf; 2665 while (ptr < &sbuf[sz]) { 2666 sepptr = strchr(ptr, '@'); 2667 *sepptr = '\0'; 2668 namestring = ptr; 2669 sepptr++; 2670 ptr = sepptr; 2671 ptr += strlen(ptr); 2672 ptr++; /* move past the '\0' separating paths */ 2673 epname = pathstring2epnamenp(sepptr); 2674 fmri = node2fmri(epname); 2675 if (platform_path_exists(fmri)) { 2676 newentp = MALLOC(sizeof (*newentp)); 2677 newentp->hdl = hdl; 2678 newentp->ipath = ipath(epname); 2679 newentp->ename = stable(namestring); 2680 SerdEngines = lut_add(SerdEngines, (void *)newentp, 2681 (void *)newentp, (lut_cmp)serd_cmp); 2682 } else 2683 Serd_need_save = 1; 2684 tree_free(epname); 2685 nvlist_free(fmri); 2686 } 2687 /* save it back again in case some of the paths no longer exist */ 2688 serd_save(); 2689 } 2690 2691 /*ARGSUSED*/ 2692 static void 2693 serd_destructor(void *left, void *right, void *arg) 2694 { 2695 struct serd_entry *entp = (struct serd_entry *)left; 2696 FREE(entp); 2697 } 2698 2699 /* 2700 * Callback used in a walk of the SerdEngines to reset matching serd engines. 2701 */ 2702 /*ARGSUSED*/ 2703 static void 2704 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp) 2705 { 2706 char *path; 2707 2708 if (entp->ipath == ipp) { 2709 path = ipath2str(entp->ename, ipp); 2710 out(O_ALTFP, "serd_reset_cb: resetting %s", path); 2711 fmd_serd_reset(entp->hdl, path); 2712 FREE(path); 2713 Serd_need_save = 1; 2714 } 2715 } 2716 2717 /*ARGSUSED*/ 2718 static void 2719 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2) 2720 { 2721 char *path; 2722 nvlist_t *fmri; 2723 2724 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2725 if (!platform_path_exists(fmri)) { 2726 path = ipath2str(entp->ename, entp->ipath); 2727 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path); 2728 fmd_serd_reset(entp->hdl, path); 2729 FREE(path); 2730 Serd_need_save = 1; 2731 } 2732 nvlist_free(fmri); 2733 } 2734 2735 void 2736 serd_fini(void) 2737 { 2738 lut_free(SerdEngines, serd_destructor, NULL); 2739 } 2740 2741 static void 2742 publish_suspects(struct fme *fmep, struct rsl *srl) 2743 { 2744 struct rsl *rp; 2745 nvlist_t *fault; 2746 uint8_t cert; 2747 uint_t *frs; 2748 uint_t frsum, fr; 2749 uint_t messval; 2750 uint_t retireval; 2751 uint_t responseval; 2752 struct node *snp; 2753 int frcnt, fridx; 2754 boolean_t allfaulty = B_TRUE; 2755 struct rsl *erl = srl + fmep->nsuspects - 1; 2756 2757 /* 2758 * sort the array 2759 */ 2760 qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp); 2761 2762 /* sum the fitrates */ 2763 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2764 fridx = frcnt = frsum = 0; 2765 2766 for (rp = srl; rp <= erl; rp++) { 2767 struct node *n; 2768 2769 n = eventprop_lookup(rp->suspect, L_FITrate); 2770 if (node2uint(n, &fr) != 0) { 2771 out(O_DEBUG|O_NONL, "event "); 2772 ipath_print(O_DEBUG|O_NONL, 2773 rp->suspect->enode->u.event.ename->u.name.s, 2774 rp->suspect->ipp); 2775 out(O_DEBUG, " has no FITrate (using 1)"); 2776 fr = 1; 2777 } else if (fr == 0) { 2778 out(O_DEBUG|O_NONL, "event "); 2779 ipath_print(O_DEBUG|O_NONL, 2780 rp->suspect->enode->u.event.ename->u.name.s, 2781 rp->suspect->ipp); 2782 out(O_DEBUG, " has zero FITrate (using 1)"); 2783 fr = 1; 2784 } 2785 2786 frs[fridx++] = fr; 2787 frsum += fr; 2788 frcnt++; 2789 } 2790 2791 /* Add them in reverse order of our sort, as fmd reverses order */ 2792 for (rp = erl; rp >= srl; rp--) { 2793 cert = percentof(frs[--fridx], frsum); 2794 fault = fmd_nvl_create_fault(fmep->hdl, 2795 rp->suspect->enode->u.event.ename->u.name.s, 2796 cert, 2797 rp->asru, 2798 rp->fru, 2799 rp->rsrc); 2800 if (fault == NULL) 2801 out(O_DIE, "fault creation failed"); 2802 /* if "message" property exists, add it to the fault */ 2803 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2804 &messval) == 0) { 2805 2806 out(O_ALTFP, 2807 "[FME%d, %s adds message=%d to suspect list]", 2808 fmep->id, 2809 rp->suspect->enode->u.event.ename->u.name.s, 2810 messval); 2811 if (nvlist_add_boolean_value(fault, 2812 FM_SUSPECT_MESSAGE, 2813 (messval) ? B_TRUE : B_FALSE) != 0) { 2814 out(O_DIE, "cannot add no-message to fault"); 2815 } 2816 } 2817 2818 /* if "retire" property exists, add it to the fault */ 2819 if (node2uint(eventprop_lookup(rp->suspect, L_retire), 2820 &retireval) == 0) { 2821 2822 out(O_ALTFP, 2823 "[FME%d, %s adds retire=%d to suspect list]", 2824 fmep->id, 2825 rp->suspect->enode->u.event.ename->u.name.s, 2826 retireval); 2827 if (nvlist_add_boolean_value(fault, 2828 FM_SUSPECT_RETIRE, 2829 (retireval) ? B_TRUE : B_FALSE) != 0) { 2830 out(O_DIE, "cannot add no-retire to fault"); 2831 } 2832 } 2833 2834 /* if "response" property exists, add it to the fault */ 2835 if (node2uint(eventprop_lookup(rp->suspect, L_response), 2836 &responseval) == 0) { 2837 2838 out(O_ALTFP, 2839 "[FME%d, %s adds response=%d to suspect list]", 2840 fmep->id, 2841 rp->suspect->enode->u.event.ename->u.name.s, 2842 responseval); 2843 if (nvlist_add_boolean_value(fault, 2844 FM_SUSPECT_RESPONSE, 2845 (responseval) ? B_TRUE : B_FALSE) != 0) { 2846 out(O_DIE, "cannot add no-response to fault"); 2847 } 2848 } 2849 2850 /* add any payload properties */ 2851 lut_walk(rp->suspect->payloadprops, 2852 (lut_cb)addpayloadprop, (void *)fault); 2853 rslfree(rp); 2854 2855 /* 2856 * If "action" property exists, evaluate it; this must be done 2857 * before the allfaulty check below since some actions may 2858 * modify the asru to be used in fmd_nvl_fmri_has_fault. This 2859 * needs to be restructured if any new actions are introduced 2860 * that have effects that we do not want to be visible if 2861 * we decide not to publish in the dupclose check below. 2862 */ 2863 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2864 struct evalue evalue; 2865 2866 out(O_ALTFP|O_NONL, 2867 "[FME%d, %s action ", fmep->id, 2868 rp->suspect->enode->u.event.ename->u.name.s); 2869 ptree_name_iter(O_ALTFP|O_NONL, snp); 2870 out(O_ALTFP, "]"); 2871 Action_nvl = fault; 2872 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2873 NULL, 0, &evalue); 2874 } 2875 2876 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2877 2878 /* 2879 * check if the asru is already marked as "faulty". 2880 */ 2881 if (allfaulty) { 2882 nvlist_t *asru; 2883 2884 out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id); 2885 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2886 out(O_ALTFP|O_VERB|O_NONL, " "); 2887 if (nvlist_lookup_nvlist(fault, 2888 FM_FAULT_ASRU, &asru) != 0) { 2889 out(O_ALTFP|O_VERB, "NULL asru"); 2890 allfaulty = B_FALSE; 2891 } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru, 2892 FMD_HAS_FAULT_ASRU, NULL)) { 2893 out(O_ALTFP|O_VERB, "faulty"); 2894 } else { 2895 out(O_ALTFP|O_VERB, "not faulty"); 2896 allfaulty = B_FALSE; 2897 } 2898 } 2899 2900 } 2901 2902 if (!allfaulty) { 2903 /* 2904 * don't update the count stat if all asrus are already 2905 * present and unrepaired in the asru cache 2906 */ 2907 for (rp = erl; rp >= srl; rp--) { 2908 struct event *suspect = rp->suspect; 2909 2910 if (suspect == NULL) 2911 continue; 2912 2913 /* if "count" exists, increment the appropriate stat */ 2914 if ((snp = eventprop_lookup(suspect, 2915 L_count)) != NULL) { 2916 out(O_ALTFP|O_NONL, 2917 "[FME%d, %s count ", fmep->id, 2918 suspect->enode->u.event.ename->u.name.s); 2919 ptree_name_iter(O_ALTFP|O_NONL, snp); 2920 out(O_ALTFP, "]"); 2921 istat_bump(snp, 0); 2922 2923 } 2924 } 2925 istat_save(); /* write out any istat changes */ 2926 } 2927 } 2928 2929 static const char * 2930 undiag_2defect_str(int ud) 2931 { 2932 switch (ud) { 2933 case UD_VAL_MISSINGINFO: 2934 case UD_VAL_MISSINGOBS: 2935 case UD_VAL_MISSINGPATH: 2936 case UD_VAL_MISSINGZERO: 2937 case UD_VAL_BADOBS: 2938 case UD_VAL_CFGMISMATCH: 2939 return (UNDIAG_DEFECT_CHKPT); 2940 break; 2941 2942 case UD_VAL_BADEVENTI: 2943 case UD_VAL_BADEVENTPATH: 2944 case UD_VAL_BADEVENTCLASS: 2945 case UD_VAL_INSTFAIL: 2946 case UD_VAL_NOPATH: 2947 case UD_VAL_UNSOLVD: 2948 return (UNDIAG_DEFECT_FME); 2949 break; 2950 2951 case UD_VAL_MAXFME: 2952 return (UNDIAG_DEFECT_LIMIT); 2953 break; 2954 2955 case UD_VAL_UNKNOWN: 2956 default: 2957 return (UNDIAG_DEFECT_UNKNOWN); 2958 break; 2959 } 2960 } 2961 2962 static const char * 2963 undiag_2fault_str(int ud) 2964 { 2965 switch (ud) { 2966 case UD_VAL_BADEVENTI: 2967 case UD_VAL_BADEVENTPATH: 2968 case UD_VAL_BADEVENTCLASS: 2969 case UD_VAL_INSTFAIL: 2970 case UD_VAL_NOPATH: 2971 case UD_VAL_UNSOLVD: 2972 return (UNDIAG_FAULT_FME); 2973 default: 2974 return (NULL); 2975 } 2976 } 2977 2978 static char * 2979 undiag_2reason_str(int ud, char *arg) 2980 { 2981 const char *ptr; 2982 char *buf; 2983 int with_arg = 0; 2984 2985 switch (ud) { 2986 case UD_VAL_BADEVENTPATH: 2987 ptr = UD_STR_BADEVENTPATH; 2988 with_arg = 1; 2989 break; 2990 case UD_VAL_BADEVENTCLASS: 2991 ptr = UD_STR_BADEVENTCLASS; 2992 with_arg = 1; 2993 break; 2994 case UD_VAL_BADEVENTI: 2995 ptr = UD_STR_BADEVENTI; 2996 with_arg = 1; 2997 break; 2998 case UD_VAL_BADOBS: 2999 ptr = UD_STR_BADOBS; 3000 break; 3001 case UD_VAL_CFGMISMATCH: 3002 ptr = UD_STR_CFGMISMATCH; 3003 break; 3004 case UD_VAL_INSTFAIL: 3005 ptr = UD_STR_INSTFAIL; 3006 with_arg = 1; 3007 break; 3008 case UD_VAL_MAXFME: 3009 ptr = UD_STR_MAXFME; 3010 break; 3011 case UD_VAL_MISSINGINFO: 3012 ptr = UD_STR_MISSINGINFO; 3013 break; 3014 case UD_VAL_MISSINGOBS: 3015 ptr = UD_STR_MISSINGOBS; 3016 break; 3017 case UD_VAL_MISSINGPATH: 3018 ptr = UD_STR_MISSINGPATH; 3019 break; 3020 case UD_VAL_MISSINGZERO: 3021 ptr = UD_STR_MISSINGZERO; 3022 break; 3023 case UD_VAL_NOPATH: 3024 ptr = UD_STR_NOPATH; 3025 with_arg = 1; 3026 break; 3027 case UD_VAL_UNSOLVD: 3028 ptr = UD_STR_UNSOLVD; 3029 break; 3030 case UD_VAL_UNKNOWN: 3031 default: 3032 ptr = UD_STR_UNKNOWN; 3033 break; 3034 } 3035 if (with_arg) { 3036 buf = MALLOC(strlen(ptr) + strlen(arg) - 1); 3037 (void) sprintf(buf, ptr, arg); 3038 } else { 3039 buf = MALLOC(strlen(ptr) + 1); 3040 (void) sprintf(buf, ptr); 3041 } 3042 return (buf); 3043 } 3044 3045 static void 3046 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase, 3047 nvlist_t *detector, char *arg) 3048 { 3049 struct case_list *newcase; 3050 nvlist_t *defect, *fault; 3051 const char *faultstr; 3052 char *reason = undiag_2reason_str(Undiag_reason, arg); 3053 3054 out(O_ALTFP, 3055 "[undiagnosable ereport received, " 3056 "creating and closing a new case (%s)]", reason); 3057 3058 newcase = MALLOC(sizeof (struct case_list)); 3059 newcase->next = NULL; 3060 newcase->fmcase = fmcase; 3061 if (Undiagablecaselist != NULL) 3062 newcase->next = Undiagablecaselist; 3063 Undiagablecaselist = newcase; 3064 3065 if (ffep != NULL) 3066 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 3067 3068 /* add defect */ 3069 defect = fmd_nvl_create_fault(hdl, 3070 undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector); 3071 (void) nvlist_add_string(defect, UNDIAG_REASON, reason); 3072 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE); 3073 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE); 3074 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 3075 3076 /* add fault if appropriate */ 3077 faultstr = undiag_2fault_str(Undiag_reason); 3078 if (faultstr != NULL) { 3079 fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL, 3080 detector); 3081 (void) nvlist_add_string(fault, UNDIAG_REASON, reason); 3082 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE, 3083 B_FALSE); 3084 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE, 3085 B_FALSE); 3086 fmd_case_add_suspect(hdl, newcase->fmcase, fault); 3087 } 3088 FREE(reason); 3089 3090 /* solve and close case */ 3091 fmd_case_solve(hdl, newcase->fmcase); 3092 fmd_case_close(hdl, newcase->fmcase); 3093 Undiag_reason = UD_VAL_UNKNOWN; 3094 } 3095 3096 static void 3097 fme_undiagnosable(struct fme *f) 3098 { 3099 nvlist_t *defect, *fault, *detector = NULL; 3100 struct event *ep; 3101 char *pathstr; 3102 const char *faultstr; 3103 char *reason = undiag_2reason_str(Undiag_reason, NULL); 3104 3105 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 3106 f->id, fmd_case_uuid(f->hdl, f->fmcase), reason); 3107 3108 for (ep = f->observations; ep; ep = ep->observations) { 3109 3110 if (ep->ffep != f->e0r) 3111 fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep); 3112 3113 pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp))); 3114 platform_units_translate(0, f->config, NULL, NULL, &detector, 3115 pathstr); 3116 FREE(pathstr); 3117 3118 /* add defect */ 3119 defect = fmd_nvl_create_fault(f->hdl, 3120 undiag_2defect_str(Undiag_reason), 50 / f->uniqobs, 3121 NULL, NULL, detector); 3122 (void) nvlist_add_string(defect, UNDIAG_REASON, reason); 3123 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, 3124 B_FALSE); 3125 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, 3126 B_FALSE); 3127 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 3128 3129 /* add fault if appropriate */ 3130 faultstr = undiag_2fault_str(Undiag_reason); 3131 if (faultstr == NULL) 3132 continue; 3133 fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs, 3134 NULL, NULL, detector); 3135 (void) nvlist_add_string(fault, UNDIAG_REASON, reason); 3136 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE, 3137 B_FALSE); 3138 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE, 3139 B_FALSE); 3140 fmd_case_add_suspect(f->hdl, f->fmcase, fault); 3141 nvlist_free(detector); 3142 } 3143 FREE(reason); 3144 fmd_case_solve(f->hdl, f->fmcase); 3145 fmd_case_close(f->hdl, f->fmcase); 3146 Undiag_reason = UD_VAL_UNKNOWN; 3147 } 3148 3149 /* 3150 * fme_close_case 3151 * 3152 * Find the requested case amongst our fmes and close it. Free up 3153 * the related fme. 3154 */ 3155 void 3156 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 3157 { 3158 struct case_list *ucasep, *prevcasep = NULL; 3159 struct fme *prev = NULL; 3160 struct fme *fmep; 3161 3162 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 3163 if (fmcase != ucasep->fmcase) { 3164 prevcasep = ucasep; 3165 continue; 3166 } 3167 3168 if (prevcasep == NULL) 3169 Undiagablecaselist = Undiagablecaselist->next; 3170 else 3171 prevcasep->next = ucasep->next; 3172 3173 FREE(ucasep); 3174 return; 3175 } 3176 3177 for (fmep = FMElist; fmep; fmep = fmep->next) { 3178 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 3179 break; 3180 prev = fmep; 3181 } 3182 3183 if (fmep == NULL) { 3184 out(O_WARN, "Eft asked to close unrecognized case [%s].", 3185 fmd_case_uuid(hdl, fmcase)); 3186 return; 3187 } 3188 3189 if (EFMElist == fmep) 3190 EFMElist = prev; 3191 3192 if (prev == NULL) 3193 FMElist = FMElist->next; 3194 else 3195 prev->next = fmep->next; 3196 3197 fmep->next = NULL; 3198 3199 /* Get rid of any timer this fme has set */ 3200 if (fmep->wull != 0) 3201 fmd_timer_remove(fmep->hdl, fmep->timer); 3202 3203 if (ClosedFMEs == NULL) { 3204 ClosedFMEs = fmep; 3205 } else { 3206 fmep->next = ClosedFMEs; 3207 ClosedFMEs = fmep; 3208 } 3209 3210 Open_fme_count--; 3211 3212 /* See if we can close the overflow FME */ 3213 if (Open_fme_count <= Max_fme) { 3214 for (fmep = FMElist; fmep; fmep = fmep->next) { 3215 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 3216 fmep->fmcase))) 3217 break; 3218 } 3219 3220 if (fmep != NULL) 3221 fmd_case_close(fmep->hdl, fmep->fmcase); 3222 } 3223 } 3224 3225 /* 3226 * fme_set_timer() 3227 * If the time we need to wait for the given FME is less than the 3228 * current timer, kick that old timer out and establish a new one. 3229 */ 3230 static int 3231 fme_set_timer(struct fme *fmep, unsigned long long wull) 3232 { 3233 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 3234 ptree_timeval(O_ALTFP|O_VERB, &wull); 3235 3236 if (wull <= fmep->pull) { 3237 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 3238 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 3239 out(O_ALTFP|O_VERB, NULL); 3240 /* we've waited at least wull already, don't need timer */ 3241 return (0); 3242 } 3243 3244 out(O_ALTFP|O_VERB|O_NONL, " currently "); 3245 if (fmep->wull != 0) { 3246 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 3247 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 3248 out(O_ALTFP|O_VERB, NULL); 3249 } else { 3250 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 3251 out(O_ALTFP|O_VERB, NULL); 3252 } 3253 3254 if (fmep->wull != 0) 3255 if (wull >= fmep->wull) 3256 /* New timer would fire later than established timer */ 3257 return (0); 3258 3259 if (fmep->wull != 0) { 3260 fmd_timer_remove(fmep->hdl, fmep->timer); 3261 } 3262 3263 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 3264 fmep->e0r, wull); 3265 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 3266 fmep->wull = wull; 3267 return (1); 3268 } 3269 3270 void 3271 fme_timer_fired(struct fme *fmep, id_t tid) 3272 { 3273 struct fme *ffmep = NULL; 3274 3275 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 3276 if (ffmep == fmep) 3277 break; 3278 3279 if (ffmep == NULL) { 3280 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 3281 (void *)fmep); 3282 return; 3283 } 3284 3285 out(O_ALTFP|O_VERB, "Timer fired %lx", tid); 3286 fmep->pull = fmep->wull; 3287 fmep->wull = 0; 3288 fmd_buf_write(fmep->hdl, fmep->fmcase, 3289 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 3290 3291 fme_eval(fmep, fmep->e0r); 3292 } 3293 3294 /* 3295 * Preserve the fme's suspect list in its psuspects list, NULLing the 3296 * suspects list in the meantime. 3297 */ 3298 static void 3299 save_suspects(struct fme *fmep) 3300 { 3301 struct event *ep; 3302 struct event *nextep; 3303 3304 /* zero out the previous suspect list */ 3305 for (ep = fmep->psuspects; ep; ep = nextep) { 3306 nextep = ep->psuspects; 3307 ep->psuspects = NULL; 3308 } 3309 fmep->psuspects = NULL; 3310 3311 /* zero out the suspect list, copying it to previous suspect list */ 3312 fmep->psuspects = fmep->suspects; 3313 for (ep = fmep->suspects; ep; ep = nextep) { 3314 nextep = ep->suspects; 3315 ep->psuspects = ep->suspects; 3316 ep->suspects = NULL; 3317 ep->is_suspect = 0; 3318 } 3319 fmep->suspects = NULL; 3320 fmep->nsuspects = 0; 3321 } 3322 3323 /* 3324 * Retrieve the fme's suspect list from its psuspects list. 3325 */ 3326 static void 3327 restore_suspects(struct fme *fmep) 3328 { 3329 struct event *ep; 3330 struct event *nextep; 3331 3332 fmep->nsuspects = 0; 3333 fmep->suspects = fmep->psuspects; 3334 for (ep = fmep->psuspects; ep; ep = nextep) { 3335 fmep->nsuspects++; 3336 nextep = ep->psuspects; 3337 ep->suspects = ep->psuspects; 3338 } 3339 } 3340 3341 /* 3342 * this is what we use to call the Emrys prototype code instead of main() 3343 */ 3344 static void 3345 fme_eval(struct fme *fmep, fmd_event_t *ffep) 3346 { 3347 struct event *ep; 3348 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 3349 struct rsl *srl = NULL; 3350 struct rsl *srl2 = NULL; 3351 int mess_zero_count; 3352 int rpcnt; 3353 3354 save_suspects(fmep); 3355 3356 out(O_ALTFP, "Evaluate FME %d", fmep->id); 3357 indent_set(" "); 3358 3359 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 3360 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 3361 3362 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 3363 fme_state2str(fmep->state)); 3364 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3365 out(O_ALTFP|O_NONL, " "); 3366 itree_pevent_brief(O_ALTFP|O_NONL, ep); 3367 } 3368 out(O_ALTFP, NULL); 3369 3370 switch (fmep->state) { 3371 case FME_CREDIBLE: 3372 print_suspects(SLNEW, fmep); 3373 (void) upsets_eval(fmep, ffep); 3374 3375 /* 3376 * we may have already posted suspects in upsets_eval() which 3377 * can recurse into fme_eval() again. If so then just return. 3378 */ 3379 if (fmep->posted_suspects) 3380 return; 3381 3382 stats_counter_bump(fmep->diags); 3383 rpcnt = fmep->nsuspects; 3384 save_suspects(fmep); 3385 3386 /* 3387 * create two lists, one for "message=1" faults and one for 3388 * "message=0" faults. If we have a mixture we will generate 3389 * two separate suspect lists. 3390 */ 3391 srl = MALLOC(rpcnt * sizeof (struct rsl)); 3392 bzero(srl, rpcnt * sizeof (struct rsl)); 3393 srl2 = MALLOC(rpcnt * sizeof (struct rsl)); 3394 bzero(srl2, rpcnt * sizeof (struct rsl)); 3395 mess_zero_count = trim_suspects(fmep, srl, srl2, ffep); 3396 3397 /* 3398 * If the resulting suspect list has no members, we're 3399 * done so simply close the case. Otherwise sort and publish. 3400 */ 3401 if (fmep->nsuspects == 0 && mess_zero_count == 0) { 3402 out(O_ALTFP, 3403 "[FME%d, case %s (all suspects are upsets)]", 3404 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3405 fmd_case_close(fmep->hdl, fmep->fmcase); 3406 } else if (fmep->nsuspects != 0 && mess_zero_count == 0) { 3407 publish_suspects(fmep, srl); 3408 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 3409 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3410 fmd_case_solve(fmep->hdl, fmep->fmcase); 3411 } else if (fmep->nsuspects == 0 && mess_zero_count != 0) { 3412 fmep->nsuspects = mess_zero_count; 3413 publish_suspects(fmep, srl2); 3414 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 3415 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3416 fmd_case_solve(fmep->hdl, fmep->fmcase); 3417 } else { 3418 struct event *obsp; 3419 struct fme *nfmep; 3420 3421 publish_suspects(fmep, srl); 3422 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 3423 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3424 fmd_case_solve(fmep->hdl, fmep->fmcase); 3425 3426 /* 3427 * Got both message=0 and message=1 so create a 3428 * duplicate case. Also need a temporary duplicate fme 3429 * structure for use by publish_suspects(). 3430 */ 3431 nfmep = alloc_fme(); 3432 nfmep->id = Nextid++; 3433 nfmep->hdl = fmep->hdl; 3434 nfmep->nsuspects = mess_zero_count; 3435 nfmep->fmcase = fmd_case_open(fmep->hdl, NULL); 3436 out(O_ALTFP|O_STAMP, 3437 "[creating parallel FME%d, case %s]", nfmep->id, 3438 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 3439 Open_fme_count++; 3440 if (ffep) { 3441 fmd_case_setprincipal(nfmep->hdl, 3442 nfmep->fmcase, ffep); 3443 fmd_case_add_ereport(nfmep->hdl, 3444 nfmep->fmcase, ffep); 3445 } 3446 for (obsp = fmep->observations; obsp; 3447 obsp = obsp->observations) 3448 if (obsp->ffep && obsp->ffep != ffep) 3449 fmd_case_add_ereport(nfmep->hdl, 3450 nfmep->fmcase, obsp->ffep); 3451 3452 publish_suspects(nfmep, srl2); 3453 out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id, 3454 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 3455 fmd_case_solve(nfmep->hdl, nfmep->fmcase); 3456 FREE(nfmep); 3457 } 3458 FREE(srl); 3459 FREE(srl2); 3460 restore_suspects(fmep); 3461 3462 fmep->posted_suspects = 1; 3463 fmd_buf_write(fmep->hdl, fmep->fmcase, 3464 WOBUF_POSTD, 3465 (void *)&fmep->posted_suspects, 3466 sizeof (fmep->posted_suspects)); 3467 3468 /* 3469 * Now the suspects have been posted, we can clear up 3470 * the instance tree as we won't be looking at it again. 3471 * Also cancel the timer as the case is now solved. 3472 */ 3473 if (fmep->wull != 0) { 3474 fmd_timer_remove(fmep->hdl, fmep->timer); 3475 fmep->wull = 0; 3476 } 3477 break; 3478 3479 case FME_WAIT: 3480 ASSERT(my_delay > fmep->ull); 3481 (void) fme_set_timer(fmep, my_delay); 3482 print_suspects(SLWAIT, fmep); 3483 itree_prune(fmep->eventtree); 3484 return; 3485 3486 case FME_DISPROVED: 3487 print_suspects(SLDISPROVED, fmep); 3488 Undiag_reason = UD_VAL_UNSOLVD; 3489 fme_undiagnosable(fmep); 3490 break; 3491 } 3492 3493 itree_free(fmep->eventtree); 3494 fmep->eventtree = NULL; 3495 structconfig_free(fmep->config); 3496 fmep->config = NULL; 3497 destroy_fme_bufs(fmep); 3498 } 3499 3500 static void indent(void); 3501 static int triggered(struct fme *fmep, struct event *ep, int mark); 3502 static enum fme_state effects_test(struct fme *fmep, 3503 struct event *fault_event, unsigned long long at_latest_by, 3504 unsigned long long *pdelay); 3505 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 3506 unsigned long long at_latest_by, unsigned long long *pdelay); 3507 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 3508 unsigned long long at_latest_by, unsigned long long *pdelay); 3509 3510 static int 3511 checkconstraints(struct fme *fmep, struct arrow *arrowp) 3512 { 3513 struct constraintlist *ctp; 3514 struct evalue value; 3515 char *sep = ""; 3516 3517 if (arrowp->forever_false) { 3518 indent(); 3519 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 3520 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3521 out(O_ALTFP|O_VERB|O_NONL, sep); 3522 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3523 sep = ", "; 3524 } 3525 out(O_ALTFP|O_VERB, NULL); 3526 return (0); 3527 } 3528 if (arrowp->forever_true) { 3529 indent(); 3530 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: "); 3531 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3532 out(O_ALTFP|O_VERB|O_NONL, sep); 3533 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3534 sep = ", "; 3535 } 3536 out(O_ALTFP|O_VERB, NULL); 3537 return (1); 3538 } 3539 3540 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3541 if (eval_expr(ctp->cnode, NULL, NULL, 3542 &fmep->globals, fmep->config, 3543 arrowp, 0, &value)) { 3544 /* evaluation successful */ 3545 if (value.t == UNDEFINED || value.v == 0) { 3546 /* known false */ 3547 arrowp->forever_false = 1; 3548 indent(); 3549 out(O_ALTFP|O_VERB|O_NONL, 3550 " False constraint: "); 3551 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3552 out(O_ALTFP|O_VERB, NULL); 3553 return (0); 3554 } 3555 } else { 3556 /* evaluation unsuccessful -- unknown value */ 3557 indent(); 3558 out(O_ALTFP|O_VERB|O_NONL, 3559 " Deferred constraint: "); 3560 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3561 out(O_ALTFP|O_VERB, NULL); 3562 return (1); 3563 } 3564 } 3565 /* known true */ 3566 arrowp->forever_true = 1; 3567 indent(); 3568 out(O_ALTFP|O_VERB|O_NONL, " True constraint: "); 3569 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3570 out(O_ALTFP|O_VERB|O_NONL, sep); 3571 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3572 sep = ", "; 3573 } 3574 out(O_ALTFP|O_VERB, NULL); 3575 return (1); 3576 } 3577 3578 static int 3579 triggered(struct fme *fmep, struct event *ep, int mark) 3580 { 3581 struct bubble *bp; 3582 struct arrowlist *ap; 3583 int count = 0; 3584 3585 stats_counter_bump(fmep->Tcallcount); 3586 for (bp = itree_next_bubble(ep, NULL); bp; 3587 bp = itree_next_bubble(ep, bp)) { 3588 if (bp->t != B_TO) 3589 continue; 3590 for (ap = itree_next_arrow(bp, NULL); ap; 3591 ap = itree_next_arrow(bp, ap)) { 3592 /* check count of marks against K in the bubble */ 3593 if ((ap->arrowp->mark & mark) && 3594 ++count >= bp->nork) 3595 return (1); 3596 } 3597 } 3598 return (0); 3599 } 3600 3601 static int 3602 mark_arrows(struct fme *fmep, struct event *ep, int mark, 3603 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 3604 { 3605 struct bubble *bp; 3606 struct arrowlist *ap; 3607 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3608 unsigned long long my_delay; 3609 enum fme_state result; 3610 int retval = 0; 3611 3612 for (bp = itree_next_bubble(ep, NULL); bp; 3613 bp = itree_next_bubble(ep, bp)) { 3614 if (bp->t != B_FROM) 3615 continue; 3616 stats_counter_bump(fmep->Marrowcount); 3617 for (ap = itree_next_arrow(bp, NULL); ap; 3618 ap = itree_next_arrow(bp, ap)) { 3619 struct event *ep2 = ap->arrowp->head->myevent; 3620 /* 3621 * if we're clearing marks, we can avoid doing 3622 * all that work evaluating constraints. 3623 */ 3624 if (mark == 0) { 3625 if (ap->arrowp->arrow_marked == 0) 3626 continue; 3627 ap->arrowp->arrow_marked = 0; 3628 ap->arrowp->mark &= ~EFFECTS_COUNTER; 3629 if (keep && (ep2->cached_state & 3630 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 3631 ep2->keep_in_tree = 1; 3632 ep2->cached_state &= 3633 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 3634 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 3635 keep); 3636 continue; 3637 } 3638 ap->arrowp->arrow_marked = 1; 3639 if (ep2->cached_state & REQMNTS_DISPROVED) { 3640 indent(); 3641 out(O_ALTFP|O_VERB|O_NONL, 3642 " ALREADY DISPROVED "); 3643 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3644 out(O_ALTFP|O_VERB, NULL); 3645 continue; 3646 } 3647 if (ep2->cached_state & WAIT_EFFECT) { 3648 indent(); 3649 out(O_ALTFP|O_VERB|O_NONL, 3650 " ALREADY EFFECTS WAIT "); 3651 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3652 out(O_ALTFP|O_VERB, NULL); 3653 continue; 3654 } 3655 if (ep2->cached_state & CREDIBLE_EFFECT) { 3656 indent(); 3657 out(O_ALTFP|O_VERB|O_NONL, 3658 " ALREADY EFFECTS CREDIBLE "); 3659 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3660 out(O_ALTFP|O_VERB, NULL); 3661 continue; 3662 } 3663 if ((ep2->cached_state & PARENT_WAIT) && 3664 (mark & PARENT_WAIT)) { 3665 indent(); 3666 out(O_ALTFP|O_VERB|O_NONL, 3667 " ALREADY PARENT EFFECTS WAIT "); 3668 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3669 out(O_ALTFP|O_VERB, NULL); 3670 continue; 3671 } 3672 platform_set_payloadnvp(ep2->nvp); 3673 if (checkconstraints(fmep, ap->arrowp) == 0) { 3674 platform_set_payloadnvp(NULL); 3675 indent(); 3676 out(O_ALTFP|O_VERB|O_NONL, 3677 " CONSTRAINTS FAIL "); 3678 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3679 out(O_ALTFP|O_VERB, NULL); 3680 continue; 3681 } 3682 platform_set_payloadnvp(NULL); 3683 ap->arrowp->mark |= EFFECTS_COUNTER; 3684 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 3685 indent(); 3686 out(O_ALTFP|O_VERB|O_NONL, 3687 " K-COUNT NOT YET MET "); 3688 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3689 out(O_ALTFP|O_VERB, NULL); 3690 continue; 3691 } 3692 ep2->cached_state &= ~PARENT_WAIT; 3693 /* 3694 * if we've reached an ereport and no propagation time 3695 * is specified, use the Hesitate value 3696 */ 3697 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 3698 ap->arrowp->maxdelay == 0ULL) { 3699 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 3700 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3701 out(O_ALTFP|O_VERB, NULL); 3702 result = requirements_test(fmep, ep2, Hesitate, 3703 &my_delay); 3704 } else { 3705 result = requirements_test(fmep, ep2, 3706 at_latest_by + ap->arrowp->maxdelay, 3707 &my_delay); 3708 } 3709 if (result == FME_WAIT) { 3710 retval = WAIT_EFFECT; 3711 if (overall_delay > my_delay) 3712 overall_delay = my_delay; 3713 ep2->cached_state |= WAIT_EFFECT; 3714 indent(); 3715 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 3716 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3717 out(O_ALTFP|O_VERB, NULL); 3718 indent_push(" E"); 3719 if (mark_arrows(fmep, ep2, PARENT_WAIT, 3720 at_latest_by, &my_delay, 0) == 3721 WAIT_EFFECT) { 3722 retval = WAIT_EFFECT; 3723 if (overall_delay > my_delay) 3724 overall_delay = my_delay; 3725 } 3726 indent_pop(); 3727 } else if (result == FME_DISPROVED) { 3728 indent(); 3729 out(O_ALTFP|O_VERB|O_NONL, 3730 " EFFECTS DISPROVED "); 3731 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3732 out(O_ALTFP|O_VERB, NULL); 3733 } else { 3734 ep2->cached_state |= mark; 3735 indent(); 3736 if (mark == CREDIBLE_EFFECT) 3737 out(O_ALTFP|O_VERB|O_NONL, 3738 " EFFECTS CREDIBLE "); 3739 else 3740 out(O_ALTFP|O_VERB|O_NONL, 3741 " PARENT EFFECTS WAIT "); 3742 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3743 out(O_ALTFP|O_VERB, NULL); 3744 indent_push(" E"); 3745 if (mark_arrows(fmep, ep2, mark, at_latest_by, 3746 &my_delay, 0) == WAIT_EFFECT) { 3747 retval = WAIT_EFFECT; 3748 if (overall_delay > my_delay) 3749 overall_delay = my_delay; 3750 } 3751 indent_pop(); 3752 } 3753 } 3754 } 3755 if (retval == WAIT_EFFECT) 3756 *pdelay = overall_delay; 3757 return (retval); 3758 } 3759 3760 static enum fme_state 3761 effects_test(struct fme *fmep, struct event *fault_event, 3762 unsigned long long at_latest_by, unsigned long long *pdelay) 3763 { 3764 struct event *error_event; 3765 enum fme_state return_value = FME_CREDIBLE; 3766 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3767 unsigned long long my_delay; 3768 3769 stats_counter_bump(fmep->Ecallcount); 3770 indent_push(" E"); 3771 indent(); 3772 out(O_ALTFP|O_VERB|O_NONL, "->"); 3773 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3774 out(O_ALTFP|O_VERB, NULL); 3775 3776 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 3777 &my_delay, 0) == WAIT_EFFECT) { 3778 return_value = FME_WAIT; 3779 if (overall_delay > my_delay) 3780 overall_delay = my_delay; 3781 } 3782 for (error_event = fmep->observations; 3783 error_event; error_event = error_event->observations) { 3784 indent(); 3785 out(O_ALTFP|O_VERB|O_NONL, " "); 3786 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 3787 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 3788 if (error_event->cached_state & 3789 (PARENT_WAIT|WAIT_EFFECT)) { 3790 out(O_ALTFP|O_VERB, " NOT YET triggered"); 3791 continue; 3792 } 3793 return_value = FME_DISPROVED; 3794 out(O_ALTFP|O_VERB, " NOT triggered"); 3795 break; 3796 } else { 3797 out(O_ALTFP|O_VERB, " triggered"); 3798 } 3799 } 3800 if (return_value == FME_DISPROVED) { 3801 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 3802 } else { 3803 fault_event->keep_in_tree = 1; 3804 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 3805 } 3806 3807 indent(); 3808 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 3809 fme_state2str(return_value)); 3810 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3811 out(O_ALTFP|O_VERB, NULL); 3812 indent_pop(); 3813 if (return_value == FME_WAIT) 3814 *pdelay = overall_delay; 3815 return (return_value); 3816 } 3817 3818 static enum fme_state 3819 requirements_test(struct fme *fmep, struct event *ep, 3820 unsigned long long at_latest_by, unsigned long long *pdelay) 3821 { 3822 int waiting_events; 3823 int credible_events; 3824 int deferred_events; 3825 enum fme_state return_value = FME_CREDIBLE; 3826 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3827 unsigned long long arrow_delay; 3828 unsigned long long my_delay; 3829 struct event *ep2; 3830 struct bubble *bp; 3831 struct arrowlist *ap; 3832 3833 if (ep->cached_state & REQMNTS_CREDIBLE) { 3834 indent(); 3835 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 3836 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3837 out(O_ALTFP|O_VERB, NULL); 3838 return (FME_CREDIBLE); 3839 } 3840 if (ep->cached_state & REQMNTS_DISPROVED) { 3841 indent(); 3842 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 3843 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3844 out(O_ALTFP|O_VERB, NULL); 3845 return (FME_DISPROVED); 3846 } 3847 if (ep->cached_state & REQMNTS_WAIT) { 3848 indent(); 3849 *pdelay = ep->cached_delay; 3850 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 3851 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3852 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 3853 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3854 out(O_ALTFP|O_VERB, NULL); 3855 return (FME_WAIT); 3856 } 3857 stats_counter_bump(fmep->Rcallcount); 3858 indent_push(" R"); 3859 indent(); 3860 out(O_ALTFP|O_VERB|O_NONL, "->"); 3861 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3862 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3863 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3864 out(O_ALTFP|O_VERB, NULL); 3865 3866 if (ep->t == N_EREPORT) { 3867 if (ep->count == 0) { 3868 if (fmep->pull >= at_latest_by) { 3869 return_value = FME_DISPROVED; 3870 } else { 3871 ep->cached_delay = *pdelay = at_latest_by; 3872 return_value = FME_WAIT; 3873 } 3874 } 3875 3876 indent(); 3877 switch (return_value) { 3878 case FME_CREDIBLE: 3879 ep->cached_state |= REQMNTS_CREDIBLE; 3880 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 3881 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3882 break; 3883 case FME_DISPROVED: 3884 ep->cached_state |= REQMNTS_DISPROVED; 3885 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3886 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3887 break; 3888 case FME_WAIT: 3889 ep->cached_state |= REQMNTS_WAIT; 3890 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 3891 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3892 out(O_ALTFP|O_VERB|O_NONL, " to "); 3893 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3894 break; 3895 default: 3896 out(O_DIE, "requirements_test: unexpected fme_state"); 3897 break; 3898 } 3899 out(O_ALTFP|O_VERB, NULL); 3900 indent_pop(); 3901 3902 return (return_value); 3903 } 3904 3905 /* this event is not a report, descend the tree */ 3906 for (bp = itree_next_bubble(ep, NULL); bp; 3907 bp = itree_next_bubble(ep, bp)) { 3908 int n; 3909 3910 if (bp->t != B_FROM) 3911 continue; 3912 3913 n = bp->nork; 3914 3915 credible_events = 0; 3916 waiting_events = 0; 3917 deferred_events = 0; 3918 arrow_delay = TIMEVAL_EVENTUALLY; 3919 /* 3920 * n is -1 for 'A' so adjust it. 3921 * XXX just count up the arrows for now. 3922 */ 3923 if (n < 0) { 3924 n = 0; 3925 for (ap = itree_next_arrow(bp, NULL); ap; 3926 ap = itree_next_arrow(bp, ap)) 3927 n++; 3928 indent(); 3929 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 3930 } else { 3931 indent(); 3932 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 3933 } 3934 3935 if (n == 0) 3936 continue; 3937 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 3938 for (ap = itree_next_arrow(bp, NULL); ap; 3939 ap = itree_next_arrow(bp, ap)) { 3940 ep2 = ap->arrowp->head->myevent; 3941 platform_set_payloadnvp(ep2->nvp); 3942 (void) checkconstraints(fmep, ap->arrowp); 3943 if (!ap->arrowp->forever_false) { 3944 /* 3945 * if all arrows are invalidated by the 3946 * constraints, then we should elide the 3947 * whole bubble to be consistant with 3948 * the tree creation time behaviour 3949 */ 3950 bp->mark |= BUBBLE_OK; 3951 platform_set_payloadnvp(NULL); 3952 break; 3953 } 3954 platform_set_payloadnvp(NULL); 3955 } 3956 } 3957 for (ap = itree_next_arrow(bp, NULL); ap; 3958 ap = itree_next_arrow(bp, ap)) { 3959 ep2 = ap->arrowp->head->myevent; 3960 if (n <= credible_events) 3961 break; 3962 3963 ap->arrowp->mark |= REQMNTS_COUNTER; 3964 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 3965 /* XXX adding max timevals! */ 3966 switch (requirements_test(fmep, ep2, 3967 at_latest_by + ap->arrowp->maxdelay, 3968 &my_delay)) { 3969 case FME_DEFERRED: 3970 deferred_events++; 3971 break; 3972 case FME_CREDIBLE: 3973 credible_events++; 3974 break; 3975 case FME_DISPROVED: 3976 break; 3977 case FME_WAIT: 3978 if (my_delay < arrow_delay) 3979 arrow_delay = my_delay; 3980 waiting_events++; 3981 break; 3982 default: 3983 out(O_DIE, 3984 "Bug in requirements_test."); 3985 } 3986 else 3987 deferred_events++; 3988 } 3989 if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) { 3990 bp->mark |= BUBBLE_ELIDED; 3991 continue; 3992 } 3993 indent(); 3994 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3995 credible_events + deferred_events, waiting_events); 3996 if (credible_events + deferred_events + waiting_events < n) { 3997 /* Can never meet requirements */ 3998 ep->cached_state |= REQMNTS_DISPROVED; 3999 indent(); 4000 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 4001 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4002 out(O_ALTFP|O_VERB, NULL); 4003 indent_pop(); 4004 return (FME_DISPROVED); 4005 } 4006 if (credible_events + deferred_events < n) { 4007 /* will have to wait */ 4008 /* wait time is shortest known */ 4009 if (arrow_delay < overall_delay) 4010 overall_delay = arrow_delay; 4011 return_value = FME_WAIT; 4012 } else if (credible_events < n) { 4013 if (return_value != FME_WAIT) 4014 return_value = FME_DEFERRED; 4015 } 4016 } 4017 4018 /* 4019 * don't mark as FME_DEFERRED. If this event isn't reached by another 4020 * path, then this will be considered FME_CREDIBLE. But if it is 4021 * reached by a different path so the K-count is met, then might 4022 * get overridden by FME_WAIT or FME_DISPROVED. 4023 */ 4024 if (return_value == FME_WAIT) { 4025 ep->cached_state |= REQMNTS_WAIT; 4026 ep->cached_delay = *pdelay = overall_delay; 4027 } else if (return_value == FME_CREDIBLE) { 4028 ep->cached_state |= REQMNTS_CREDIBLE; 4029 } 4030 indent(); 4031 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 4032 fme_state2str(return_value)); 4033 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4034 out(O_ALTFP|O_VERB, NULL); 4035 indent_pop(); 4036 return (return_value); 4037 } 4038 4039 static enum fme_state 4040 causes_test(struct fme *fmep, struct event *ep, 4041 unsigned long long at_latest_by, unsigned long long *pdelay) 4042 { 4043 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 4044 unsigned long long my_delay; 4045 int credible_results = 0; 4046 int waiting_results = 0; 4047 enum fme_state fstate; 4048 struct event *tail_event; 4049 struct bubble *bp; 4050 struct arrowlist *ap; 4051 int k = 1; 4052 4053 stats_counter_bump(fmep->Ccallcount); 4054 indent_push(" C"); 4055 indent(); 4056 out(O_ALTFP|O_VERB|O_NONL, "->"); 4057 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4058 out(O_ALTFP|O_VERB, NULL); 4059 4060 for (bp = itree_next_bubble(ep, NULL); bp; 4061 bp = itree_next_bubble(ep, bp)) { 4062 if (bp->t != B_TO) 4063 continue; 4064 k = bp->nork; /* remember the K value */ 4065 for (ap = itree_next_arrow(bp, NULL); ap; 4066 ap = itree_next_arrow(bp, ap)) { 4067 int do_not_follow = 0; 4068 4069 /* 4070 * if we get to the same event multiple times 4071 * only worry about the first one. 4072 */ 4073 if (ap->arrowp->tail->myevent->cached_state & 4074 CAUSES_TESTED) { 4075 indent(); 4076 out(O_ALTFP|O_VERB|O_NONL, 4077 " causes test already run for "); 4078 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 4079 ap->arrowp->tail->myevent); 4080 out(O_ALTFP|O_VERB, NULL); 4081 continue; 4082 } 4083 4084 /* 4085 * see if false constraint prevents us 4086 * from traversing this arrow 4087 */ 4088 platform_set_payloadnvp(ep->nvp); 4089 if (checkconstraints(fmep, ap->arrowp) == 0) 4090 do_not_follow = 1; 4091 platform_set_payloadnvp(NULL); 4092 if (do_not_follow) { 4093 indent(); 4094 out(O_ALTFP|O_VERB|O_NONL, 4095 " False arrow from "); 4096 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 4097 ap->arrowp->tail->myevent); 4098 out(O_ALTFP|O_VERB, NULL); 4099 continue; 4100 } 4101 4102 ap->arrowp->tail->myevent->cached_state |= 4103 CAUSES_TESTED; 4104 tail_event = ap->arrowp->tail->myevent; 4105 fstate = hypothesise(fmep, tail_event, at_latest_by, 4106 &my_delay); 4107 4108 switch (fstate) { 4109 case FME_WAIT: 4110 if (my_delay < overall_delay) 4111 overall_delay = my_delay; 4112 waiting_results++; 4113 break; 4114 case FME_CREDIBLE: 4115 credible_results++; 4116 break; 4117 case FME_DISPROVED: 4118 break; 4119 default: 4120 out(O_DIE, "Bug in causes_test"); 4121 } 4122 } 4123 } 4124 /* compare against K */ 4125 if (credible_results + waiting_results < k) { 4126 indent(); 4127 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 4128 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4129 out(O_ALTFP|O_VERB, NULL); 4130 indent_pop(); 4131 return (FME_DISPROVED); 4132 } 4133 if (waiting_results != 0) { 4134 *pdelay = overall_delay; 4135 indent(); 4136 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 4137 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4138 out(O_ALTFP|O_VERB|O_NONL, " to "); 4139 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 4140 out(O_ALTFP|O_VERB, NULL); 4141 indent_pop(); 4142 return (FME_WAIT); 4143 } 4144 indent(); 4145 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 4146 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4147 out(O_ALTFP|O_VERB, NULL); 4148 indent_pop(); 4149 return (FME_CREDIBLE); 4150 } 4151 4152 static enum fme_state 4153 hypothesise(struct fme *fmep, struct event *ep, 4154 unsigned long long at_latest_by, unsigned long long *pdelay) 4155 { 4156 enum fme_state rtr, otr; 4157 unsigned long long my_delay; 4158 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 4159 4160 stats_counter_bump(fmep->Hcallcount); 4161 indent_push(" H"); 4162 indent(); 4163 out(O_ALTFP|O_VERB|O_NONL, "->"); 4164 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4165 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 4166 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 4167 out(O_ALTFP|O_VERB, NULL); 4168 4169 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 4170 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 4171 overall_delay = my_delay; 4172 if (rtr != FME_DISPROVED) { 4173 if (is_problem(ep->t)) { 4174 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 4175 if (otr != FME_DISPROVED) { 4176 if (fmep->peek == 0 && ep->is_suspect == 0) { 4177 ep->suspects = fmep->suspects; 4178 ep->is_suspect = 1; 4179 fmep->suspects = ep; 4180 fmep->nsuspects++; 4181 } 4182 } 4183 } else 4184 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 4185 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 4186 overall_delay = my_delay; 4187 if ((otr != FME_DISPROVED) && 4188 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 4189 *pdelay = overall_delay; 4190 } 4191 if (rtr == FME_DISPROVED) { 4192 indent(); 4193 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 4194 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4195 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 4196 indent_pop(); 4197 return (FME_DISPROVED); 4198 } 4199 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 4200 indent(); 4201 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 4202 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4203 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 4204 indent_pop(); 4205 return (FME_DISPROVED); 4206 } 4207 if (otr == FME_DISPROVED) { 4208 indent(); 4209 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 4210 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4211 out(O_ALTFP|O_VERB, " (causes are not credible)"); 4212 indent_pop(); 4213 return (FME_DISPROVED); 4214 } 4215 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 4216 indent(); 4217 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 4218 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4219 out(O_ALTFP|O_VERB|O_NONL, " to "); 4220 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 4221 out(O_ALTFP|O_VERB, NULL); 4222 indent_pop(); 4223 return (FME_WAIT); 4224 } 4225 indent(); 4226 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 4227 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 4228 out(O_ALTFP|O_VERB, NULL); 4229 indent_pop(); 4230 return (FME_CREDIBLE); 4231 } 4232 4233 /* 4234 * fme_istat_load -- reconstitute any persistent istats 4235 */ 4236 void 4237 fme_istat_load(fmd_hdl_t *hdl) 4238 { 4239 int sz; 4240 char *sbuf; 4241 char *ptr; 4242 4243 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 4244 out(O_ALTFP, "fme_istat_load: No stats"); 4245 return; 4246 } 4247 4248 sbuf = alloca(sz); 4249 4250 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 4251 4252 /* 4253 * pick apart the serialized stats 4254 * 4255 * format is: 4256 * <class-name>, '@', <path>, '\0', <value>, '\0' 4257 * for example: 4258 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 4259 * 4260 * since this is parsing our own serialized data, any parsing issues 4261 * are fatal, so we check for them all with ASSERT() below. 4262 */ 4263 ptr = sbuf; 4264 while (ptr < &sbuf[sz]) { 4265 char *sepptr; 4266 struct node *np; 4267 int val; 4268 4269 sepptr = strchr(ptr, '@'); 4270 ASSERT(sepptr != NULL); 4271 *sepptr = '\0'; 4272 4273 /* construct the event */ 4274 np = newnode(T_EVENT, NULL, 0); 4275 np->u.event.ename = newnode(T_NAME, NULL, 0); 4276 np->u.event.ename->u.name.t = N_STAT; 4277 np->u.event.ename->u.name.s = stable(ptr); 4278 np->u.event.ename->u.name.it = IT_ENAME; 4279 np->u.event.ename->u.name.last = np->u.event.ename; 4280 4281 ptr = sepptr + 1; 4282 ASSERT(ptr < &sbuf[sz]); 4283 ptr += strlen(ptr); 4284 ptr++; /* move past the '\0' separating path from value */ 4285 ASSERT(ptr < &sbuf[sz]); 4286 ASSERT(isdigit(*ptr)); 4287 val = atoi(ptr); 4288 ASSERT(val > 0); 4289 ptr += strlen(ptr); 4290 ptr++; /* move past the final '\0' for this entry */ 4291 4292 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 4293 ASSERT(np->u.event.epname != NULL); 4294 4295 istat_bump(np, val); 4296 tree_free(np); 4297 } 4298 4299 istat_save(); 4300 } 4301