1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/fm/protocol.h>
27
28 #include <fmd_api.h>
29 #include <fmd_subr.h>
30 #include <fmd_string.h>
31 #include <fmd_protocol.h>
32 #include <fmd_module.h>
33 #include <fmd_error.h>
34
35 static struct {
36 fmd_stat_t nosub;
37 fmd_stat_t module;
38 } self_stats = {
39 { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" },
40 { "module", FMD_TYPE_UINT64, "error events received from fmd modules" },
41 };
42
43 typedef struct self_case {
44 enum { SC_CLASS, SC_MODULE } sc_kind;
45 char *sc_name;
46 } self_case_t;
47
48 static self_case_t *
self_case_create(fmd_hdl_t * hdl,int kind,const char * name)49 self_case_create(fmd_hdl_t *hdl, int kind, const char *name)
50 {
51 self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP);
52
53 scp->sc_kind = kind;
54 scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP);
55
56 return (scp);
57 }
58
59 static void
self_case_destroy(fmd_hdl_t * hdl,self_case_t * scp)60 self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp)
61 {
62 fmd_hdl_strfree(hdl, scp->sc_name);
63 fmd_hdl_free(hdl, scp, sizeof (self_case_t));
64 }
65
66 static fmd_case_t *
self_case_lookup(fmd_hdl_t * hdl,int kind,const char * name)67 self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name)
68 {
69 fmd_case_t *cp = NULL;
70
71 while ((cp = fmd_case_next(hdl, cp)) != NULL) {
72 self_case_t *scp = fmd_case_getspecific(hdl, cp);
73 if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0)
74 break;
75 }
76
77 return (cp);
78 }
79
80 /*ARGSUSED*/
81 static void
self_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)82 self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
83 {
84 fmd_case_t *cp;
85 nvlist_t *flt, *mod;
86 char *name;
87 int err = 0;
88
89 /*
90 * If we get an error report from another fmd module, then create a
91 * case for the module and add the ereport to it. The error is either
92 * from fmd_hdl_error() or from fmd_api_error(). If it is the latter,
93 * fmd_module_error() will send another event of class EFMD_MOD_FAIL
94 * when the module has failed, at which point we can solve the case.
95 * We can also close the case on EFMD_MOD_CONF (bad config file).
96 */
97 if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 &&
98 nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 &&
99 nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) {
100
101 if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) {
102 cp = fmd_case_open(hdl,
103 self_case_create(hdl, SC_MODULE, name));
104 }
105
106 fmd_case_add_ereport(hdl, cp, ep);
107 self_stats.module.fmds_value.ui64++;
108 (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err);
109
110 if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF)
111 return; /* module is still active, so keep case open */
112
113 if (fmd_case_solved(hdl, cp))
114 return; /* case is already closed but error in _fini */
115
116 class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF;
117 flt = fmd_protocol_fault(class, 100, mod, NULL, NULL, NULL);
118
119 fmd_case_add_suspect(hdl, cp, flt);
120 fmd_case_solve(hdl, cp);
121
122 return;
123 }
124
125 /*
126 * If we get an I/O DDI ereport, drop it for now until the I/O DE is
127 * implemented and integrated. Existing drivers in O/N have bugs that
128 * will trigger these and we don't want this producing FMD_FLT_NOSUB.
129 */
130 if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0)
131 return; /* if we got a DDI ereport, drop it for now */
132
133 /*
134 * If we get any other type of event then it is of a class for which
135 * there are no subscribers. Some of these correspond to internal fmd
136 * errors, which we ignore. Otherwise we keep one case per class and
137 * use it to produce a message indicating that something is awry.
138 */
139 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
140 strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 ||
141 strcmp(class, FM_LIST_UPDATED_CLASS) == 0 ||
142 strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 ||
143 strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
144 strncmp(class, FM_FAULT_CLASS, strlen(FM_FAULT_CLASS)) == 0 ||
145 strncmp(class, FM_DEFECT_CLASS, strlen(FM_DEFECT_CLASS)) == 0)
146 return; /* if no agents are present just drop list.* */
147
148 if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0)
149 return; /* if fmd itself produced the error just drop it */
150
151 if (strncmp(class, FMD_RSRC_CLASS, FMD_RSRC_CLASS_LEN) == 0)
152 return; /* if fmd itself produced the event just drop it */
153
154 if (strncmp(class, SYSEVENT_RSRC_CLASS, SYSEVENT_RSRC_CLASS_LEN) == 0)
155 return; /* sysvent resources are auto generated by fmd */
156
157 if (self_case_lookup(hdl, SC_CLASS, class) != NULL)
158 return; /* case is already open against this class */
159
160 if (strncmp(class, FM_IREPORT_CLASS ".",
161 sizeof (FM_IREPORT_CLASS)) == 0)
162 return; /* no subscriber required for ireport.* */
163
164 cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class));
165 fmd_case_add_ereport(hdl, cp, ep);
166 self_stats.nosub.fmds_value.ui64++;
167
168 flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL, NULL);
169 (void) nvlist_add_string(flt, "nosub_class", class);
170 fmd_case_add_suspect(hdl, cp, flt);
171 fmd_case_solve(hdl, cp);
172 }
173
174 static void
self_close(fmd_hdl_t * hdl,fmd_case_t * cp)175 self_close(fmd_hdl_t *hdl, fmd_case_t *cp)
176 {
177 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp));
178 }
179
180 static const fmd_hdl_ops_t self_ops = {
181 self_recv, /* fmdo_recv */
182 NULL, /* fmdo_timeout */
183 self_close, /* fmdo_close */
184 NULL, /* fmdo_stats */
185 NULL, /* fmdo_gc */
186 };
187
188 void
self_init(fmd_hdl_t * hdl)189 self_init(fmd_hdl_t *hdl)
190 {
191 fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */
192
193 fmd_hdl_info_t info = {
194 "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL
195 };
196
197 /*
198 * Unlike other modules, fmd-self-diagnosis has some special needs that
199 * fall outside of what we want in the module API. Manually disable
200 * checkpointing for this module by tweaking the mod_stats values.
201 * The self-diagnosis world relates to fmd's running state and modules
202 * which all change when it restarts, so don't bother w/ checkpointing.
203 */
204 (void) pthread_mutex_lock(&mp->mod_stats_lock);
205 mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE;
206 mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE;
207 (void) pthread_mutex_unlock(&mp->mod_stats_lock);
208
209 if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0)
210 return; /* failed to register with fmd */
211
212 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) /
213 sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats);
214 }
215
216 void
self_fini(fmd_hdl_t * hdl)217 self_fini(fmd_hdl_t *hdl)
218 {
219 fmd_case_t *cp = NULL;
220
221 while ((cp = fmd_case_next(hdl, cp)) != NULL)
222 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp));
223 }
224