xref: /titanic_50/usr/src/cmd/fm/modules/common/sw-diag-response/common/sw_main_cmn.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Code shared by software-diagnosis and software-response modules.
28  * The fmd module linkage info for the two modules lives in swde_main.c
29  * (for software-diagnosis) and swrp_main.c (for software-response).
30  */
31 
32 #include "../common/sw_impl.h"
33 
34 /*
35  * Each subsidiary that is hosted is assigned a unique subsidiary id.  These
36  * macros convert between the id of a subsidiary and the index used in keeping
37  * track of subsidiaries.  Outside of this file these ids should remain
38  * opaque.
39  */
40 #define	ID2IDX(id)	((int)((id) & 0xff0000) >> 16)
41 #define	IDX2ID(i)	((id_t)((i) << 16) | 0x1d000000)
42 
43 #define	SUBIDVALID(msinfo, id)  (((int)(id) & 0xff00ffff) == 0x1d000000 && \
44     ID2IDX(id) < (msinfo)->swms_dispcnt)
45 
46 static struct {
47 	fmd_stat_t sw_recv_total;
48 	fmd_stat_t sw_recv_match;
49 	fmd_stat_t sw_recv_callback;
50 } sw_stats = {
51 	{ "sw_recv_total", FMD_TYPE_UINT64,
52 	    "total events received" },
53 	{ "sw_recv_match", FMD_TYPE_UINT64,
54 	    "events matching some subsidiary" },
55 	{ "sw_recv_callback", FMD_TYPE_UINT64,
56 	    "callbacks to all subsidiaries" },
57 };
58 
59 #define	BUMPSTAT(stat)		sw_stats.stat.fmds_value.ui64++
60 #define	BUMPSTATN(stat, n)	sw_stats.stat.fmds_value.ui64 += (n)
61 
62 /*
63  * ========================== Event Receipt =================================
64  *
65  * The fmdo_recv entry point.  See which sub de/response agents have a
66  * matching subscription and callback for the first match from each.
67  * The sub de/response agents should dispatch *all* their subscriptions
68  * via their registered dispatch table, including things like list.repaired.
69  */
70 void
sw_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)71 sw_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
72 {
73 	struct sw_modspecific *msinfo;
74 	int calls = 0;
75 	int mod;
76 
77 	BUMPSTAT(sw_recv_total);
78 
79 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
80 
81 	/*
82 	 * For each sub module that has a matching class pattern call the
83 	 * registered callback for that sub DE.  Only one match per sub module
84 	 * is allowed (the first match in its table, others are not checked).
85 	 */
86 	for (mod = 0; mod < msinfo->swms_dispcnt; mod++) {
87 		const struct sw_disp *dp;
88 		sw_dispfunc_t *dispf = NULL;
89 
90 		for (dp = (*msinfo->swms_disptbl)[mod];
91 		    dp != NULL && dp->swd_classpat != NULL; dp++) {
92 			if (fmd_nvl_class_match(hdl, nvl, dp->swd_classpat)) {
93 				dispf = dp->swd_func;
94 				break;
95 			}
96 		}
97 		if (dispf != NULL) {
98 			calls++;
99 			(*dispf)(hdl, ep, nvl, class, dp->swd_arg);
100 		}
101 	}
102 
103 	BUMPSTAT(sw_recv_match);
104 	if (calls)
105 		BUMPSTATN(sw_recv_callback, calls);
106 }
107 
108 /*
109  * ========================== Timers ========================================
110  *
111  * A subsidiary can install a timer; it must pass an additional argument
112  * identifying itself so that we can hand off to the appropriate
113  * swsub_timeout function in the fmdo_timeout entry point when the timer fires.
114  */
115 id_t
sw_timer_install(fmd_hdl_t * hdl,id_t who,void * arg,fmd_event_t * ep,hrtime_t hrt)116 sw_timer_install(fmd_hdl_t *hdl, id_t who, void *arg, fmd_event_t *ep,
117     hrtime_t hrt)
118 {
119 	struct sw_modspecific *msinfo;
120 	const struct sw_subinfo **subinfo;
121 	const struct sw_subinfo *sip;
122 	int slot, chosen = -1;
123 	id_t timerid;
124 
125 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
126 	if (!SUBIDVALID(msinfo, who))
127 		fmd_hdl_abort(hdl, "sw_timer_install: invalid subid %d\n", who);
128 
129 	subinfo = *msinfo->swms_subinfo;
130 	sip = subinfo[ID2IDX(who)];
131 
132 	if (sip-> swsub_timeout == NULL)
133 		fmd_hdl_abort(hdl, "sw_timer_install: no swsub_timeout\n");
134 
135 	/*
136 	 * Look for a slot.  Module entry points are single-threaded
137 	 * in nature, but if someone installs a timer from a door
138 	 * service function we're contended.
139 	 */
140 	(void) pthread_mutex_lock(&msinfo->swms_timerlock);
141 	for (slot = 0; slot < SW_TIMER_MAX; slot++) {
142 		if (msinfo->swms_timers[slot].swt_state != SW_TMR_INUSE) {
143 			chosen = slot;
144 			break;
145 		}
146 	}
147 
148 	if (chosen == -1)
149 		fmd_hdl_abort(hdl, "timer slots exhausted\n");
150 
151 	msinfo->swms_timers[chosen].swt_state = SW_TMR_INUSE;
152 	msinfo->swms_timers[chosen].swt_ownerid = who;
153 	msinfo->swms_timers[chosen].swt_timerid = timerid =
154 	    fmd_timer_install(hdl, arg, ep, hrt);
155 
156 	(void) pthread_mutex_unlock(&msinfo->swms_timerlock);
157 
158 	return (timerid);
159 }
160 
161 /*
162  * Look for a timer installed by a given subsidiary matching timerid.
163  */
164 static int
subtimer_find(struct sw_modspecific * msinfo,id_t who,id_t timerid)165 subtimer_find(struct sw_modspecific *msinfo, id_t who, id_t timerid)
166 {
167 	int slot;
168 
169 	for (slot = 0; slot < SW_TIMER_MAX; slot++) {
170 		if (msinfo->swms_timers[slot].swt_state == SW_TMR_INUSE &&
171 		    (who == -1 ||
172 		    msinfo->swms_timers[slot].swt_ownerid == who) &&
173 		    msinfo->swms_timers[slot].swt_timerid == timerid)
174 			return (slot);
175 	}
176 
177 	return (-1);
178 }
179 
180 void
sw_timer_remove(fmd_hdl_t * hdl,id_t who,id_t timerid)181 sw_timer_remove(fmd_hdl_t *hdl, id_t who, id_t timerid)
182 {
183 	struct sw_modspecific *msinfo;
184 	const struct sw_subinfo **subinfo;
185 	const struct sw_subinfo *sip;
186 	int slot;
187 
188 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
189 	if (!SUBIDVALID(msinfo, who))
190 		fmd_hdl_abort(hdl, "sw_timer_remove: invalid subid\n");
191 
192 	subinfo = *msinfo->swms_subinfo;
193 	sip = subinfo[ID2IDX(who)];
194 
195 	(void) pthread_mutex_lock(&msinfo->swms_timerlock);
196 	if ((slot = subtimer_find(msinfo, who, timerid)) == -1)
197 		fmd_hdl_abort(hdl, "sw_timer_remove: timerid %d not found "
198 		    "for %s\n", timerid, sip->swsub_name);
199 	fmd_timer_remove(hdl, timerid);
200 	msinfo->swms_timers[slot].swt_state = SW_TMR_RMVD;
201 	(void) pthread_mutex_unlock(&msinfo->swms_timerlock);
202 }
203 
204 /*
205  * The fmdo_timeout entry point.
206  */
207 void
sw_timeout(fmd_hdl_t * hdl,id_t timerid,void * arg)208 sw_timeout(fmd_hdl_t *hdl, id_t timerid, void *arg)
209 {
210 	struct sw_modspecific *msinfo;
211 	const struct sw_subinfo **subinfo;
212 	const struct sw_subinfo *sip;
213 	id_t owner;
214 	int slot;
215 
216 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
217 
218 	(void) pthread_mutex_lock(&msinfo->swms_timerlock);
219 	if ((slot = subtimer_find(msinfo, -1, timerid)) == -1)
220 		fmd_hdl_abort(hdl, "sw_timeout: timerid %d not found\n");
221 	(void) pthread_mutex_unlock(&msinfo->swms_timerlock);
222 
223 	owner = msinfo->swms_timers[slot].swt_ownerid;
224 	if (!SUBIDVALID(msinfo, owner))
225 		fmd_hdl_abort(hdl, "sw_timeout: invalid subid\n");
226 
227 	subinfo = *msinfo->swms_subinfo;
228 	sip = subinfo[ID2IDX(owner)];
229 
230 	sip->swsub_timeout(hdl, timerid, arg);
231 }
232 
233 /*
234  * ========================== sw_subinfo access =============================
235  */
236 
237 enum sw_casetype
sw_id_to_casetype(fmd_hdl_t * hdl,id_t who)238 sw_id_to_casetype(fmd_hdl_t *hdl, id_t who)
239 {
240 	struct sw_modspecific *msinfo;
241 	const struct sw_subinfo **subinfo;
242 	const struct sw_subinfo *sip;
243 
244 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
245 	if (!SUBIDVALID(msinfo, who))
246 		fmd_hdl_abort(hdl, "sw_id_to_casetype: invalid subid %d\n",
247 		    who);
248 
249 	subinfo = *msinfo->swms_subinfo;
250 	sip = subinfo[ID2IDX(who)];
251 
252 	if ((sip->swsub_casetype & SW_CASE_NONE) != SW_CASE_NONE)
253 		fmd_hdl_abort(hdl, "sw_id_to_casetype: bad case type %d "
254 		    "for %s\n", sip->swsub_casetype, sip->swsub_name);
255 
256 	return (sip->swsub_casetype);
257 }
258 
259 /*
260  * Given a case type lookup the struct sw_subinfo for the subsidiary
261  * that opens cases of that type.
262  */
263 static const struct sw_subinfo *
sw_subinfo_bycase(fmd_hdl_t * hdl,enum sw_casetype type)264 sw_subinfo_bycase(fmd_hdl_t *hdl, enum sw_casetype type)
265 {
266 	struct sw_modspecific *msinfo;
267 	const struct sw_subinfo **subinfo;
268 	const struct sw_subinfo *sip;
269 	int i;
270 
271 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
272 
273 	subinfo = *msinfo->swms_subinfo;
274 	for (i = 0; i < SW_SUB_MAX; i++) {
275 		sip = subinfo[i];
276 		if (sip->swsub_casetype == type)
277 			return (sip);
278 	}
279 
280 	return (NULL);
281 }
282 
283 /*
284  * Find the case close function for the given case type; can be NULL.
285  */
286 swsub_case_close_func_t *
sw_sub_case_close_func(fmd_hdl_t * hdl,enum sw_casetype type)287 sw_sub_case_close_func(fmd_hdl_t *hdl, enum sw_casetype type)
288 {
289 	const struct sw_subinfo *sip;
290 
291 	if ((sip = sw_subinfo_bycase(hdl, type)) == NULL)
292 		fmd_hdl_abort(hdl, "sw_sub_case_close_func: case type "
293 		    "%d not found\n", type);
294 
295 	return (sip->swsub_case_close);
296 }
297 
298 /*
299  * Find the case verify function for the given case type; can be NULL.
300  */
301 sw_case_vrfy_func_t *
sw_sub_case_vrfy_func(fmd_hdl_t * hdl,enum sw_casetype type)302 sw_sub_case_vrfy_func(fmd_hdl_t *hdl, enum sw_casetype type)
303 {
304 	const struct sw_subinfo *sip;
305 
306 	if ((sip = sw_subinfo_bycase(hdl, type)) == NULL)
307 		fmd_hdl_abort(hdl, "sw_sub_case_vrfy_func: case type "
308 		    "%d not found\n", type);
309 
310 	return (sip->swsub_case_verify);
311 }
312 
313 /*
314  * ========================== Initialization ================================
315  *
316  * The two modules - software-diagnosis and software-response - call
317  * sw_fmd_init from their _fmd_init entry points.
318  */
319 
320 static void
sw_add_callbacks(fmd_hdl_t * hdl,const char * who,const struct sw_disp * dp,int nelem,struct sw_modspecific * msinfo)321 sw_add_callbacks(fmd_hdl_t *hdl, const char *who,
322     const struct sw_disp *dp, int nelem, struct sw_modspecific *msinfo)
323 {
324 	int i;
325 
326 	(*msinfo->swms_disptbl)[msinfo->swms_dispcnt++] = dp;
327 
328 	if (dp == NULL)
329 		return;		/* subsidiary failed init */
330 
331 	/* check that the nelem'th entry is the NULL termination */
332 	if (dp[nelem - 1].swd_classpat != NULL ||
333 	    dp[nelem - 1].swd_func != NULL || dp[nelem - 1].swd_arg != NULL)
334 		fmd_hdl_abort(hdl, "subsidiary %s dispatch table not NULL-"
335 		    "terminated\n", who);
336 
337 	/* now validate the entries; we allow NULL handlers */
338 	for (i = 0; i < nelem - 1; i++) {
339 		if (dp[i].swd_classpat == NULL)
340 			fmd_hdl_abort(hdl, "subsidiary %s dispatch table entry "
341 			    "%d has a NULL pattern or function\n", who, i);
342 	}
343 
344 }
345 
346 int
sw_fmd_init(fmd_hdl_t * hdl,const fmd_hdl_info_t * hdlinfo,const struct sw_subinfo * (* subsid)[SW_SUB_MAX])347 sw_fmd_init(fmd_hdl_t *hdl, const fmd_hdl_info_t *hdlinfo,
348     const struct sw_subinfo *(*subsid)[SW_SUB_MAX])
349 {
350 	struct sw_modspecific *msinfo;
351 	int i;
352 
353 	if (fmd_hdl_register(hdl, FMD_API_VERSION, hdlinfo) != 0)
354 		return (0);
355 
356 	if (fmd_prop_get_int32(hdl, "enable") != B_TRUE) {
357 		fmd_hdl_debug(hdl, "%s disabled though .conf file setting\n",
358 		    hdlinfo->fmdi_desc);
359 		fmd_hdl_unregister(hdl);
360 		return (0);
361 	}
362 
363 	msinfo = fmd_hdl_zalloc(hdl, sizeof (*msinfo), FMD_SLEEP);
364 
365 	msinfo->swms_subinfo = subsid;
366 	msinfo->swms_disptbl = fmd_hdl_zalloc(hdl,
367 	    SW_SUB_MAX * sizeof (struct sw_disp *), FMD_SLEEP);
368 
369 	(void) pthread_mutex_init(&msinfo->swms_timerlock, NULL);
370 
371 	for (i = 0; i < SW_TIMER_MAX; i++)
372 		msinfo->swms_timers[i].swt_state = SW_TMR_UNTOUCHED;
373 
374 	fmd_hdl_setspecific(hdl, (void *)msinfo);
375 
376 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (sw_stats) /
377 	    sizeof (fmd_stat_t), (fmd_stat_t *)&sw_stats);
378 
379 	/*
380 	 * Initialize subsidiaries.  Each must make any subscription
381 	 * requests it needs and return a pointer to a NULL-terminated
382 	 * callback dispatch table and an indication of the number of
383 	 * entries in that table including the NULL termination entry.
384 	 */
385 	for (i = 0; i < SW_SUB_MAX; i++) {
386 		const struct sw_subinfo *sip = (*subsid)[i];
387 		const struct sw_disp *dp;
388 		char dbgbuf[80];
389 		int nelem = -1;
390 		int initrslt;
391 
392 		if (!sip || sip->swsub_name == NULL)
393 			break;
394 
395 		initrslt = (*sip->swsub_init)(hdl, IDX2ID(i), &dp, &nelem);
396 
397 		(void) snprintf(dbgbuf, sizeof (dbgbuf),
398 		    "subsidiary %d (id 0x%lx) '%s'",
399 		    i, IDX2ID(i), sip->swsub_name);
400 
401 		switch (initrslt) {
402 		case SW_SUB_INIT_SUCCESS:
403 			if (dp == NULL || nelem < 1)
404 				fmd_hdl_abort(hdl, "%s returned dispatch "
405 				    "table 0x%p and nelem %d\n",
406 				    dbgbuf, dp, nelem);
407 
408 			fmd_hdl_debug(hdl, "%s initialized\n", dbgbuf);
409 			sw_add_callbacks(hdl, sip->swsub_name, dp, nelem,
410 			    msinfo);
411 			break;
412 
413 		case SW_SUB_INIT_FAIL_VOLUNTARY:
414 			fmd_hdl_debug(hdl, "%s chose not to initialize\n",
415 			    dbgbuf);
416 			sw_add_callbacks(hdl, sip->swsub_name, NULL, -1,
417 			    msinfo);
418 			break;
419 
420 		case SW_SUB_INIT_FAIL_ERROR:
421 			fmd_hdl_debug(hdl, "%s failed to initialize "
422 			    "because of an error\n", dbgbuf);
423 			sw_add_callbacks(hdl, sip->swsub_name, NULL, -1,
424 			    msinfo);
425 			break;
426 
427 		default:
428 			fmd_hdl_abort(hdl, "%s returned out-of-range result "
429 			    "%d\n", dbgbuf, initrslt);
430 			break;
431 		}
432 	}
433 
434 	return (1);
435 }
436 
437 void
sw_fmd_fini(fmd_hdl_t * hdl)438 sw_fmd_fini(fmd_hdl_t *hdl)
439 {
440 	const struct sw_subinfo **subinfo;
441 	struct sw_modspecific *msinfo;
442 	int i;
443 
444 	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);
445 	subinfo = *msinfo->swms_subinfo;
446 
447 	(void) pthread_mutex_lock(&msinfo->swms_timerlock);
448 	for (i = 0; i < SW_TIMER_MAX; i++) {
449 		if (msinfo->swms_timers[i].swt_state != SW_TMR_INUSE)
450 			continue;
451 
452 		fmd_timer_remove(hdl, msinfo->swms_timers[i].swt_timerid);
453 		msinfo->swms_timers[i].swt_state = SW_TMR_RMVD;
454 	}
455 	(void) pthread_mutex_unlock(&msinfo->swms_timerlock);
456 
457 	(void) pthread_mutex_destroy(&msinfo->swms_timerlock);
458 
459 	for (i = 0; i < msinfo->swms_dispcnt; i++) {
460 		const struct sw_subinfo *sip = subinfo[i];
461 
462 		if ((*msinfo->swms_disptbl)[i] == NULL)
463 			continue;	/* swsub_init did not succeed */
464 
465 		if (sip->swsub_fini != NULL)
466 			(*sip->swsub_fini)(hdl);
467 	}
468 
469 	fmd_hdl_free(hdl, msinfo->swms_disptbl,
470 	    SW_SUB_MAX * sizeof (struct sw_disp *));
471 
472 	fmd_hdl_setspecific(hdl, NULL);
473 	fmd_hdl_free(hdl, msinfo, sizeof (*msinfo));
474 }
475