xref: /titanic_50/usr/src/cmd/fm/modules/common/sw-diag-response/common/sw.h (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #ifndef	_SW_H
27 #define	_SW_H
28 
29 #ifdef	__cplusplus
30 extern "C" {
31 #endif
32 
33 #include <sys/fm/protocol.h>
34 #include <fm/fmd_api.h>
35 #include <libnvpair.h>
36 #include <pthread.h>
37 #include <libuutil.h>
38 
39 /*
40  * We have two real fmd modules - software-diagnosis and software-response.
41  * Each hosts a number of subsidiary diagnosis engines and response agents,
42  * although these are not fmd modules as such (the intention is to avoid
43  * a proliferation of small C diagnosis and response modules).
44  *
45  * Subsidiary "modules" are not loaded as normal fmd modules are.  Instead
46  * each of the real modules software-diagnosis and software-response includes
47  * an array listing the subsidiaries it hosts, and when the real module
48  * is loaded by fmd it iterates over this list to "load" subsidiaries by
49  * calling their nominated init function.
50  */
51 
52 /* Maximum number of subsidiary "modules" */
53 #define	SW_SUB_MAX	10
54 
55 /* Maximum number of supported timers across all subsidiaries */
56 #define	SW_TIMER_MAX	20
57 
58 /*
59  * A subsidiary must perform fmd_hdl_subscribe calls for all events of
60  * interest to it.  These are typically performed during its init
61  * function.  All subscription callbacks funnel through the shared
62  * fmdo_recv entry point; that function walks through the dispatch list
63  * for each subsidiary and performs a callback for the first matching entry of
64  * each subsidiary.  The init entry point for each subsidiary
65  * returns a pointer to an array of struct sw_disp applicable for that
66  * entity.
67  *
68  * Note that the framework does *not* perform any fmd_hdl_subscribe calls
69  * on behalf of the subsidiary - the swd_classpat member below is used
70  * in routing events, not in establishing subscriptions for them.  A
71  * subsidiary can subscribe to say "ireport.foo.a" and "ireport.foo.b"
72  * but could elect to nominate a common handler for those via a single
73  * struct sw_disp with swd_classpat of "ireport.foo.*".
74  */
75 typedef void sw_dispfunc_t(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
76     const char *, void *);
77 
78 struct sw_disp {
79 	const char *swd_classpat;	/* event classes to callback for */
80 	sw_dispfunc_t *swd_func;	/* callback function */
81 	void *swd_arg;			/* opaque argument to callback */
82 };
83 
84 /*
85  * A diagnosis or response subsidiary must provide a struct sw_subinfo with
86  * all its pertinent information;  a pointer to this structure must be
87  * included in the array of struct sw_subinfo pointers in each of
88  * software-diagnosis and software-response.
89  *
90  * swsub_name
91  *	This should be chosen to be unique to this subsidiary;
92  *	by convention it should also be the name prefix used in any fmd
93  *	buffers	the subsidiary creates.
94  *
95  * swsub_casetype
96  *	A diagnosis subsidiary solves cases using swde_case_* below, and it
97  *	must specify in swsub_casetype the type of case it solves.  A response
98  *	subsidiary must specify SW_CASE_NONE here.  A subsidiary may only solve
99  *	at most one type of case, and no two subsidiaries must solve the same
100  *	case type.  We use the case type to associate a subsidiary owner of
101  *	the fmd case that is really owned by the host module.
102  *
103  * swsub_init
104  *	The initialization function for this subsidiary, akin to the
105  *	_fmd_init in a traditional fmd module.  This must not be NULL.
106  *
107  *	 When the host diagnosis/response module initializes the _fmd_init
108  *	 entry point will call the swsub_init function for each subsidiary
109  *	 in turn.  The fmd handle has already been registered and timers are
110  *	 available for installation (see below);  the swsub_init function must
111  *	 return a pointer to a NULL-terminated array of struct sw_disp
112  *	 describing the event dispatch preferences for that module, and fill
113  *	 an integer we pass with the number of entries in that array (including
114  *	 the terminating NULL entry).  The swsub_init function also receives
115  *	 a subsidiary-unique id_t assigned by the framework that it should
116  *	 keep a note of for use in timer installation (see below);  this id
117  *	 should not be persisted to checkpoint data.
118  *
119  * swsub_fini
120  *	When the host module _fmd_fini is called it will call this function
121  *	for each subsidiary.  A subsidiary can specify NULL here.
122  *
123  * swsub_timeout
124  *	This is the timeout function to call for expired timers installed by
125  *	this subsidiary.  See sw_timer_{install,remove} below.  May be
126  *	NULL if no timers are used by this subsidiary.
127  *
128  * swsub_case_close
129  *	This function is called when a case "owned" by a subsidiary
130  *	is the subject of an fmdo_close callback.  Can be NULL, and
131  *	must be NULL for a subsidiary with case type SW_CASE_NONE (such
132  *	as a response subsidiary).
133  *
134  * swsub_case_verify
135  *	This is called during _fmd_init of the host module.  The host module
136  *	iterates over all cases that it owns and calls the verify function
137  *	for the real owner which may choose to close cases if they no longer
138  *	apply.  Can be NULL, and must be NULL for a subsidiary with case
139  *	type SW_CASE_NONE.
140  */
141 
142 /*
143  * sw_casetype values are persisted to checkpoints - do not change values.
144  */
145 enum sw_casetype {
146 	SW_CASE_NONE = 0x0ca5e000,
147 	SW_CASE_SMF,
148 	SW_CASE_PANIC
149 };
150 
151 /*
152  * Returns for swsub_init.  The swsub_fini entry point will only be
153  * called for subsidiaries that returned SW_SUB_INIT_SUCCESS on init.
154  */
155 #define	SW_SUB_INIT_SUCCESS		0
156 #define	SW_SUB_INIT_FAIL_VOLUNTARY	1	/* chose not to init */
157 #define	SW_SUB_INIT_FAIL_ERROR		2	/* error prevented init */
158 
159 typedef void swsub_case_close_func_t(fmd_hdl_t *, fmd_case_t *);
160 typedef int sw_case_vrfy_func_t(fmd_hdl_t *, fmd_case_t *);
161 
162 struct sw_subinfo {
163 	const char *swsub_name;
164 	enum sw_casetype swsub_casetype;
165 	int (*swsub_init)(fmd_hdl_t *, id_t, const struct sw_disp **, int *);
166 	void (*swsub_fini)(fmd_hdl_t *);
167 	void (*swsub_timeout)(fmd_hdl_t *, id_t, void *);
168 	swsub_case_close_func_t *swsub_case_close;
169 	sw_case_vrfy_func_t *swsub_case_verify;
170 };
171 
172 /*
173  * List sw_subinfo for each subsidiary diagnosis and response "module" here
174  */
175 extern const struct sw_subinfo smf_diag_info;
176 extern const struct sw_subinfo smf_response_info;
177 extern const struct sw_subinfo panic_diag_info;
178 
179 /*
180  * Timers - as per the fmd module API but with an additional id_t argument
181  * specifying the unique id of the subsidiary installing the timer (provided
182  * to the subsidiary in its swsub_init call).
183  */
184 extern id_t sw_timer_install(fmd_hdl_t *, id_t, void *, fmd_event_t *,
185     hrtime_t);
186 extern void sw_timer_remove(fmd_hdl_t *, id_t, id_t);
187 
188 /*
189  * The software-diagnosis subsidiaries can open and solve cases; to do so
190  * they must use the following wrappers to the usual fmd module API case
191  * management functions.  We need this so that a subsidiary can iterate
192  * over *its* cases (fmd_case_next would iterate over those of other
193  * subsidiaries), receive in the subsidiary a callback when a case it opened
194  * is closed, etc.  The subsidiary can use other fmd module API members
195  * for case management, such as fmd_case_add_ereport.
196  *
197  * Each subsidiary opens cases of its own unique type, identified by
198  * the sw_casetype enumeration.  The values used in this enumeration
199  * must never change - they are written to checkpoint state.
200  *
201  * swde_case_open
202  *	Opens a new case of the correct subsidiary type for the given
203  *	subsidiary id.  If a uuid string is provided then open a case
204  *	with that uuid using fmd_case_open_uuid, allowing case uuid
205  *	to match some relevant uuid that was received in one of the
206  *	events that has led us to open this case.
207  *
208  *	If the subsidiarywishes to associate some persistent
209  *	case data with the new case thenit can fmd_hdl_alloc and complete a
210  *	suitably-packed serialization structure and include a pointer to it
211  *	in the call to sw_case_open together with the structure size and
212  *	structure version.  The	framework will create a new fmd buffer (named
213  *	for you, based on the case type) and write the structure out to disk;
214  *	when the module or fmd is restarted this structure is restored from
215  *	disk for you and reassociated with the case - use swde_case_data to
216  *	retrieve a pointer to it.
217  *
218  * swde_case_first, swde_case_next
219  *	A subsidiary DE can iterate over its cases using swde_case_first and
220  *	swde_case_next.  For swde_case_first quote the subsidiary id;
221  *	for swde_case_next quote the last case returned.
222  *
223  * swde_case_data
224  *	Returns a pointer to the previously-serialized case data, and fills
225  *	a uint32_t with the version of that serialized data.
226  *
227  * swde_case_data_write
228  *	Whenever a subsidiary modifies its persistent data structure
229  *	it must call swde_case_data_write to indicate that the associated
230  *	fmd buffer is dirty and needs to be rewritten.
231  *
232  * swde_case_data_upgrade
233  *	If the subsidiary ever revs its persistent structure it needs to call
234  *	swde_case_data_upgrade to register the new version and structure size,
235  *	and write the structure out to a reallocated fmd buffer;  the old
236  *	case data structure (if any) will be freed.  A subsidiary may use
237  *	this interface to migrate old persistence structures restored from
238  *	checkpoint - swde_case_data will return a version number below the
239  *	current.
240  */
241 
242 extern fmd_case_t *swde_case_open(fmd_hdl_t *, id_t, char *, uint32_t,
243     void *, size_t);
244 extern fmd_case_t *swde_case_first(fmd_hdl_t *, id_t);
245 extern fmd_case_t *swde_case_next(fmd_hdl_t *, fmd_case_t *);
246 extern void *swde_case_data(fmd_hdl_t *, fmd_case_t *, uint32_t *);
247 extern void swde_case_data_write(fmd_hdl_t *, fmd_case_t *);
248 extern void swde_case_data_upgrade(fmd_hdl_t *, fmd_case_t *, uint32_t,
249     void *, size_t);
250 
251 #ifdef	__cplusplus
252 }
253 #endif
254 
255 #endif	/* _SW_H */
256