1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fm/protocol.h> 30 31 #include <fmd_api.h> 32 #include <fmd_subr.h> 33 #include <fmd_string.h> 34 #include <fmd_protocol.h> 35 #include <fmd_module.h> 36 #include <fmd_error.h> 37 38 static struct { 39 fmd_stat_t nosub; 40 fmd_stat_t module; 41 } self_stats = { 42 { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" }, 43 { "module", FMD_TYPE_UINT64, "error events received from fmd modules" }, 44 }; 45 46 typedef struct self_case { 47 enum { SC_CLASS, SC_MODULE } sc_kind; 48 char *sc_name; 49 } self_case_t; 50 51 static self_case_t * 52 self_case_create(fmd_hdl_t *hdl, int kind, const char *name) 53 { 54 self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP); 55 56 scp->sc_kind = kind; 57 scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP); 58 59 return (scp); 60 } 61 62 static void 63 self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp) 64 { 65 fmd_hdl_strfree(hdl, scp->sc_name); 66 fmd_hdl_free(hdl, scp, sizeof (self_case_t)); 67 } 68 69 static fmd_case_t * 70 self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name) 71 { 72 fmd_case_t *cp = NULL; 73 74 while ((cp = fmd_case_next(hdl, cp)) != NULL) { 75 self_case_t *scp = fmd_case_getspecific(hdl, cp); 76 if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0) 77 break; 78 } 79 80 return (cp); 81 } 82 83 /*ARGSUSED*/ 84 static void 85 self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 86 { 87 fmd_case_t *cp; 88 nvlist_t *flt, *mod; 89 char *name; 90 int err = 0; 91 92 /* 93 * If we get an error report from another fmd module, then create a 94 * case for the module and add the ereport to it. The error is either 95 * from fmd_hdl_error() or from fmd_api_error(). If it is the latter, 96 * fmd_module_error() will send another event of class EFMD_MOD_FAIL 97 * when the module has failed, at which point we can solve the case. 98 * We can also close the case on EFMD_MOD_CONF (bad config file). 99 */ 100 if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 && 101 nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 && 102 nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) { 103 104 if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) { 105 cp = fmd_case_open(hdl, 106 self_case_create(hdl, SC_MODULE, name)); 107 } 108 109 fmd_case_add_ereport(hdl, cp, ep); 110 self_stats.module.fmds_value.ui64++; 111 (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err); 112 113 if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF) 114 return; /* module is still active, so keep case open */ 115 116 if (fmd_case_solved(hdl, cp)) 117 return; /* case is already closed but error in _fini */ 118 119 class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF; 120 flt = fmd_protocol_fault(class, 100, mod, NULL, NULL); 121 122 fmd_case_add_suspect(hdl, cp, flt); 123 fmd_case_solve(hdl, cp); 124 fmd_case_convict(hdl, cp, flt); 125 126 return; 127 } 128 129 /* 130 * If we get an I/O DDI ereport, drop it for now until the I/O DE is 131 * implemented and integrated. Existing drivers in O/N have bugs that 132 * will trigger these and we don't want this producing FMD_FLT_NOSUB. 133 */ 134 if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0) 135 return; /* if we got a DDI ereport, drop it for now */ 136 137 /* 138 * If we get any other type of event then it is of a class for which 139 * there are no subscribers. Some of these correspond to internal fmd 140 * errors, which we ignore. Otherwise we keep one case per class and 141 * use it to produce a message indicating that something is awry. 142 */ 143 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) 144 return; /* if no agents are present just drop list.suspect */ 145 146 if (strncmp(class, FMD_ERR_CLASS, strlen(FMD_ERR_CLASS)) == 0) 147 return; /* if fmd itself produced the error just drop it */ 148 149 if (self_case_lookup(hdl, SC_CLASS, class) != NULL) 150 return; /* case is already open against this class */ 151 152 cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class)); 153 fmd_case_add_ereport(hdl, cp, ep); 154 self_stats.nosub.fmds_value.ui64++; 155 156 flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL); 157 fmd_case_add_suspect(hdl, cp, flt); 158 fmd_case_solve(hdl, cp); 159 } 160 161 static void 162 self_close(fmd_hdl_t *hdl, fmd_case_t *cp) 163 { 164 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); 165 } 166 167 static const fmd_hdl_ops_t self_ops = { 168 self_recv, /* fmdo_recv */ 169 NULL, /* fmdo_timeout */ 170 self_close, /* fmdo_close */ 171 NULL, /* fmdo_stats */ 172 NULL, /* fmdo_gc */ 173 }; 174 175 void 176 self_init(fmd_hdl_t *hdl) 177 { 178 fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */ 179 180 fmd_hdl_info_t info = { 181 "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL 182 }; 183 184 /* 185 * Unlike other modules, fmd-self-diagnosis has some special needs that 186 * fall outside of what we want in the module API. Manually disable 187 * checkpointing for this module by tweaking the mod_stats values. 188 * The self-diagnosis world relates to fmd's running state and modules 189 * which all change when it restarts, so don't bother w/ checkpointing. 190 */ 191 (void) pthread_mutex_lock(&mp->mod_stats_lock); 192 mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE; 193 mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE; 194 (void) pthread_mutex_unlock(&mp->mod_stats_lock); 195 196 if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0) 197 return; /* failed to register with fmd */ 198 199 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) / 200 sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats); 201 } 202 203 void 204 self_fini(fmd_hdl_t *hdl) 205 { 206 fmd_case_t *cp = NULL; 207 208 while ((cp = fmd_case_next(hdl, cp)) != NULL) 209 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); 210 } 211