/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include static struct { fmd_stat_t nosub; fmd_stat_t module; } self_stats = { { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" }, { "module", FMD_TYPE_UINT64, "error events received from fmd modules" }, }; typedef struct self_case { enum { SC_CLASS, SC_MODULE } sc_kind; char *sc_name; } self_case_t; static self_case_t * self_case_create(fmd_hdl_t *hdl, int kind, const char *name) { self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP); scp->sc_kind = kind; scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP); return (scp); } static void self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp) { fmd_hdl_strfree(hdl, scp->sc_name); fmd_hdl_free(hdl, scp, sizeof (self_case_t)); } static fmd_case_t * self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name) { fmd_case_t *cp = NULL; while ((cp = fmd_case_next(hdl, cp)) != NULL) { self_case_t *scp = fmd_case_getspecific(hdl, cp); if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0) break; } return (cp); } /*ARGSUSED*/ static void self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) { fmd_case_t *cp; nvlist_t *flt, *mod; char *name; int err = 0; /* * If we get an error report from another fmd module, then create a * case for the module and add the ereport to it. The error is either * from fmd_hdl_error() or from fmd_api_error(). If it is the latter, * fmd_module_error() will send another event of class EFMD_MOD_FAIL * when the module has failed, at which point we can solve the case. * We can also close the case on EFMD_MOD_CONF (bad config file). */ if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 && nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 && nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) { if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) { cp = fmd_case_open(hdl, self_case_create(hdl, SC_MODULE, name)); } fmd_case_add_ereport(hdl, cp, ep); self_stats.module.fmds_value.ui64++; (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err); if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF) return; /* module is still active, so keep case open */ if (fmd_case_solved(hdl, cp)) return; /* case is already closed but error in _fini */ class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF; flt = fmd_protocol_fault(class, 100, mod, NULL, NULL); fmd_case_add_suspect(hdl, cp, flt); fmd_case_solve(hdl, cp); fmd_case_convict(hdl, cp, flt); return; } /* * If we get an I/O DDI ereport, drop it for now until the I/O DE is * implemented and integrated. Existing drivers in O/N have bugs that * will trigger these and we don't want this producing FMD_FLT_NOSUB. */ if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0) return; /* if we got a DDI ereport, drop it for now */ /* * If we get any other type of event then it is of a class for which * there are no subscribers. Some of these correspond to internal fmd * errors, which we ignore. Otherwise we keep one case per class and * use it to produce a message indicating that something is awry. */ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) return; /* if no agents are present just drop list.suspect */ if (strncmp(class, FMD_ERR_CLASS, strlen(FMD_ERR_CLASS)) == 0) return; /* if fmd itself produced the error just drop it */ if (self_case_lookup(hdl, SC_CLASS, class) != NULL) return; /* case is already open against this class */ cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class)); fmd_case_add_ereport(hdl, cp, ep); self_stats.nosub.fmds_value.ui64++; flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL); fmd_case_add_suspect(hdl, cp, flt); fmd_case_solve(hdl, cp); } static void self_close(fmd_hdl_t *hdl, fmd_case_t *cp) { self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); } static const fmd_hdl_ops_t self_ops = { self_recv, /* fmdo_recv */ NULL, /* fmdo_timeout */ self_close, /* fmdo_close */ NULL, /* fmdo_stats */ NULL, /* fmdo_gc */ }; void self_init(fmd_hdl_t *hdl) { fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */ fmd_hdl_info_t info = { "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL }; /* * Unlike other modules, fmd-self-diagnosis has some special needs that * fall outside of what we want in the module API. Manually disable * checkpointing for this module by tweaking the mod_stats values. * The self-diagnosis world relates to fmd's running state and modules * which all change when it restarts, so don't bother w/ checkpointing. */ (void) pthread_mutex_lock(&mp->mod_stats_lock); mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE; mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE; (void) pthread_mutex_unlock(&mp->mod_stats_lock); if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0) return; /* failed to register with fmd */ (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) / sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats); } void self_fini(fmd_hdl_t *hdl) { fmd_case_t *cp = NULL; while ((cp = fmd_case_next(hdl, cp)) != NULL) self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); }