1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/fm/protocol.h> 27 28 #include <fmd_api.h> 29 #include <fmd_subr.h> 30 #include <fmd_string.h> 31 #include <fmd_protocol.h> 32 #include <fmd_module.h> 33 #include <fmd_error.h> 34 35 static struct { 36 fmd_stat_t nosub; 37 fmd_stat_t module; 38 } self_stats = { 39 { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" }, 40 { "module", FMD_TYPE_UINT64, "error events received from fmd modules" }, 41 }; 42 43 typedef struct self_case { 44 enum { SC_CLASS, SC_MODULE } sc_kind; 45 char *sc_name; 46 } self_case_t; 47 48 static self_case_t * 49 self_case_create(fmd_hdl_t *hdl, int kind, const char *name) 50 { 51 self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP); 52 53 scp->sc_kind = kind; 54 scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP); 55 56 return (scp); 57 } 58 59 static void 60 self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp) 61 { 62 fmd_hdl_strfree(hdl, scp->sc_name); 63 fmd_hdl_free(hdl, scp, sizeof (self_case_t)); 64 } 65 66 static fmd_case_t * 67 self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name) 68 { 69 fmd_case_t *cp = NULL; 70 71 while ((cp = fmd_case_next(hdl, cp)) != NULL) { 72 self_case_t *scp = fmd_case_getspecific(hdl, cp); 73 if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0) 74 break; 75 } 76 77 return (cp); 78 } 79 80 /*ARGSUSED*/ 81 static void 82 self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 83 { 84 fmd_case_t *cp; 85 nvlist_t *flt, *mod; 86 char *name; 87 int err = 0; 88 89 /* 90 * If we get an error report from another fmd module, then create a 91 * case for the module and add the ereport to it. The error is either 92 * from fmd_hdl_error() or from fmd_api_error(). If it is the latter, 93 * fmd_module_error() will send another event of class EFMD_MOD_FAIL 94 * when the module has failed, at which point we can solve the case. 95 * We can also close the case on EFMD_MOD_CONF (bad config file). 96 */ 97 if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 && 98 nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 && 99 nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) { 100 101 if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) { 102 cp = fmd_case_open(hdl, 103 self_case_create(hdl, SC_MODULE, name)); 104 } 105 106 fmd_case_add_ereport(hdl, cp, ep); 107 self_stats.module.fmds_value.ui64++; 108 (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err); 109 110 if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF) 111 return; /* module is still active, so keep case open */ 112 113 if (fmd_case_solved(hdl, cp)) 114 return; /* case is already closed but error in _fini */ 115 116 class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF; 117 flt = fmd_protocol_fault(class, 100, mod, NULL, NULL, NULL); 118 119 fmd_case_add_suspect(hdl, cp, flt); 120 fmd_case_solve(hdl, cp); 121 122 return; 123 } 124 125 /* 126 * If we get an I/O DDI ereport, drop it for now until the I/O DE is 127 * implemented and integrated. Existing drivers in O/N have bugs that 128 * will trigger these and we don't want this producing FMD_FLT_NOSUB. 129 */ 130 if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0) 131 return; /* if we got a DDI ereport, drop it for now */ 132 133 /* 134 * If we get any other type of event then it is of a class for which 135 * there are no subscribers. Some of these correspond to internal fmd 136 * errors, which we ignore. Otherwise we keep one case per class and 137 * use it to produce a message indicating that something is awry. 138 */ 139 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || 140 strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 || 141 strcmp(class, FM_LIST_UPDATED_CLASS) == 0 || 142 strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 || 143 strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 || 144 strncmp(class, FM_FAULT_CLASS, strlen(FM_FAULT_CLASS)) == 0 || 145 strncmp(class, FM_DEFECT_CLASS, strlen(FM_DEFECT_CLASS)) == 0) 146 return; /* if no agents are present just drop list.* */ 147 148 if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0) 149 return; /* if fmd itself produced the error just drop it */ 150 151 if (strncmp(class, FMD_RSRC_CLASS, FMD_RSRC_CLASS_LEN) == 0) 152 return; /* if fmd itself produced the event just drop it */ 153 154 if (strncmp(class, SYSEVENT_RSRC_CLASS, SYSEVENT_RSRC_CLASS_LEN) == 0) 155 return; /* sysvent resources are auto generated by fmd */ 156 157 if (self_case_lookup(hdl, SC_CLASS, class) != NULL) 158 return; /* case is already open against this class */ 159 160 if (strncmp(class, FM_IREPORT_CLASS ".", 161 sizeof (FM_IREPORT_CLASS)) == 0) 162 return; /* no subscriber required for ireport.* */ 163 164 cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class)); 165 fmd_case_add_ereport(hdl, cp, ep); 166 self_stats.nosub.fmds_value.ui64++; 167 168 flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL, NULL); 169 (void) nvlist_add_string(flt, "nosub_class", class); 170 fmd_case_add_suspect(hdl, cp, flt); 171 fmd_case_solve(hdl, cp); 172 } 173 174 static void 175 self_close(fmd_hdl_t *hdl, fmd_case_t *cp) 176 { 177 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); 178 } 179 180 static const fmd_hdl_ops_t self_ops = { 181 self_recv, /* fmdo_recv */ 182 NULL, /* fmdo_timeout */ 183 self_close, /* fmdo_close */ 184 NULL, /* fmdo_stats */ 185 NULL, /* fmdo_gc */ 186 }; 187 188 void 189 self_init(fmd_hdl_t *hdl) 190 { 191 fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */ 192 193 fmd_hdl_info_t info = { 194 "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL 195 }; 196 197 /* 198 * Unlike other modules, fmd-self-diagnosis has some special needs that 199 * fall outside of what we want in the module API. Manually disable 200 * checkpointing for this module by tweaking the mod_stats values. 201 * The self-diagnosis world relates to fmd's running state and modules 202 * which all change when it restarts, so don't bother w/ checkpointing. 203 */ 204 (void) pthread_mutex_lock(&mp->mod_stats_lock); 205 mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE; 206 mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE; 207 (void) pthread_mutex_unlock(&mp->mod_stats_lock); 208 209 if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0) 210 return; /* failed to register with fmd */ 211 212 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) / 213 sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats); 214 } 215 216 void 217 self_fini(fmd_hdl_t *hdl) 218 { 219 fmd_case_t *cp = NULL; 220 221 while ((cp = fmd_case_next(hdl, cp)) != NULL) 222 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); 223 } 224