1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _SW_H 27 #define _SW_H 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include <sys/fm/protocol.h> 34 #include <fm/fmd_api.h> 35 #include <libnvpair.h> 36 #include <pthread.h> 37 #include <libuutil.h> 38 39 /* 40 * We have two real fmd modules - software-diagnosis and software-response. 41 * Each hosts a number of subsidiary diagnosis engines and response agents, 42 * although these are not fmd modules as such (the intention is to avoid 43 * a proliferation of small C diagnosis and response modules). 44 * 45 * Subsidiary "modules" are not loaded as normal fmd modules are. Instead 46 * each of the real modules software-diagnosis and software-response includes 47 * an array listing the subsidiaries it hosts, and when the real module 48 * is loaded by fmd it iterates over this list to "load" subsidiaries by 49 * calling their nominated init function. 50 */ 51 52 /* Maximum number of subsidiary "modules" */ 53 #define SW_SUB_MAX 10 54 55 /* Maximum number of supported timers across all subsidiaries */ 56 #define SW_TIMER_MAX 20 57 58 /* 59 * A subsidiary must perform fmd_hdl_subscribe calls for all events of 60 * interest to it. These are typically performed during its init 61 * function. All subscription callbacks funnel through the shared 62 * fmdo_recv entry point; that function walks through the dispatch list 63 * for each subsidiary and performs a callback for the first matching entry of 64 * each subsidiary. The init entry point for each subsidiary 65 * returns a pointer to an array of struct sw_disp applicable for that 66 * entity. 67 * 68 * Note that the framework does *not* perform any fmd_hdl_subscribe calls 69 * on behalf of the subsidiary - the swd_classpat member below is used 70 * in routing events, not in establishing subscriptions for them. A 71 * subsidiary can subscribe to say "ireport.foo.a" and "ireport.foo.b" 72 * but could elect to nominate a common handler for those via a single 73 * struct sw_disp with swd_classpat of "ireport.foo.*". 74 */ 75 typedef void sw_dispfunc_t(fmd_hdl_t *, fmd_event_t *, nvlist_t *, 76 const char *, void *); 77 78 struct sw_disp { 79 const char *swd_classpat; /* event classes to callback for */ 80 sw_dispfunc_t *swd_func; /* callback function */ 81 void *swd_arg; /* opaque argument to callback */ 82 }; 83 84 /* 85 * A diagnosis or response subsidiary must provide a struct sw_subinfo with 86 * all its pertinent information; a pointer to this structure must be 87 * included in the array of struct sw_subinfo pointers in each of 88 * software-diagnosis and software-response. 89 * 90 * swsub_name 91 * This should be chosen to be unique to this subsidiary; 92 * by convention it should also be the name prefix used in any fmd 93 * buffers the subsidiary creates. 94 * 95 * swsub_casetype 96 * A diagnosis subsidiary solves cases using swde_case_* below, and it 97 * must specify in swsub_casetype the type of case it solves. A response 98 * subsidiary must specify SW_CASE_NONE here. A subsidiary may only solve 99 * at most one type of case, and no two subsidiaries must solve the same 100 * case type. We use the case type to associate a subsidiary owner of 101 * the fmd case that is really owned by the host module. 102 * 103 * swsub_init 104 * The initialization function for this subsidiary, akin to the 105 * _fmd_init in a traditional fmd module. This must not be NULL. 106 * 107 * When the host diagnosis/response module initializes the _fmd_init 108 * entry point will call the swsub_init function for each subsidiary 109 * in turn. The fmd handle has already been registered and timers are 110 * available for installation (see below); the swsub_init function must 111 * return a pointer to a NULL-terminated array of struct sw_disp 112 * describing the event dispatch preferences for that module, and fill 113 * an integer we pass with the number of entries in that array (including 114 * the terminating NULL entry). The swsub_init function also receives 115 * a subsidiary-unique id_t assigned by the framework that it should 116 * keep a note of for use in timer installation (see below); this id 117 * should not be persisted to checkpoint data. 118 * 119 * swsub_fini 120 * When the host module _fmd_fini is called it will call this function 121 * for each subsidiary. A subsidiary can specify NULL here. 122 * 123 * swsub_timeout 124 * This is the timeout function to call for expired timers installed by 125 * this subsidiary. See sw_timer_{install,remove} below. May be 126 * NULL if no timers are used by this subsidiary. 127 * 128 * swsub_case_close 129 * This function is called when a case "owned" by a subsidiary 130 * is the subject of an fmdo_close callback. Can be NULL, and 131 * must be NULL for a subsidiary with case type SW_CASE_NONE (such 132 * as a response subsidiary). 133 * 134 * swsub_case_verify 135 * This is called during _fmd_init of the host module. The host module 136 * iterates over all cases that it owns and calls the verify function 137 * for the real owner which may choose to close cases if they no longer 138 * apply. Can be NULL, and must be NULL for a subsidiary with case 139 * type SW_CASE_NONE. 140 */ 141 142 /* 143 * sw_casetype values are persisted to checkpoints - do not change values. 144 */ 145 enum sw_casetype { 146 SW_CASE_NONE = 0x0ca5e000, 147 SW_CASE_SMF, 148 SW_CASE_PANIC 149 }; 150 151 /* 152 * Returns for swsub_init. The swsub_fini entry point will only be 153 * called for subsidiaries that returned SW_SUB_INIT_SUCCESS on init. 154 */ 155 #define SW_SUB_INIT_SUCCESS 0 156 #define SW_SUB_INIT_FAIL_VOLUNTARY 1 /* chose not to init */ 157 #define SW_SUB_INIT_FAIL_ERROR 2 /* error prevented init */ 158 159 typedef void swsub_case_close_func_t(fmd_hdl_t *, fmd_case_t *); 160 typedef int sw_case_vrfy_func_t(fmd_hdl_t *, fmd_case_t *); 161 162 struct sw_subinfo { 163 const char *swsub_name; 164 enum sw_casetype swsub_casetype; 165 int (*swsub_init)(fmd_hdl_t *, id_t, const struct sw_disp **, int *); 166 void (*swsub_fini)(fmd_hdl_t *); 167 void (*swsub_timeout)(fmd_hdl_t *, id_t, void *); 168 swsub_case_close_func_t *swsub_case_close; 169 sw_case_vrfy_func_t *swsub_case_verify; 170 }; 171 172 /* 173 * List sw_subinfo for each subsidiary diagnosis and response "module" here 174 */ 175 extern const struct sw_subinfo smf_diag_info; 176 extern const struct sw_subinfo smf_response_info; 177 extern const struct sw_subinfo panic_diag_info; 178 179 /* 180 * Timers - as per the fmd module API but with an additional id_t argument 181 * specifying the unique id of the subsidiary installing the timer (provided 182 * to the subsidiary in its swsub_init call). 183 */ 184 extern id_t sw_timer_install(fmd_hdl_t *, id_t, void *, fmd_event_t *, 185 hrtime_t); 186 extern void sw_timer_remove(fmd_hdl_t *, id_t, id_t); 187 188 /* 189 * The software-diagnosis subsidiaries can open and solve cases; to do so 190 * they must use the following wrappers to the usual fmd module API case 191 * management functions. We need this so that a subsidiary can iterate 192 * over *its* cases (fmd_case_next would iterate over those of other 193 * subsidiaries), receive in the subsidiary a callback when a case it opened 194 * is closed, etc. The subsidiary can use other fmd module API members 195 * for case management, such as fmd_case_add_ereport. 196 * 197 * Each subsidiary opens cases of its own unique type, identified by 198 * the sw_casetype enumeration. The values used in this enumeration 199 * must never change - they are written to checkpoint state. 200 * 201 * swde_case_open 202 * Opens a new case of the correct subsidiary type for the given 203 * subsidiary id. If a uuid string is provided then open a case 204 * with that uuid using fmd_case_open_uuid, allowing case uuid 205 * to match some relevant uuid that was received in one of the 206 * events that has led us to open this case. 207 * 208 * If the subsidiarywishes to associate some persistent 209 * case data with the new case thenit can fmd_hdl_alloc and complete a 210 * suitably-packed serialization structure and include a pointer to it 211 * in the call to sw_case_open together with the structure size and 212 * structure version. The framework will create a new fmd buffer (named 213 * for you, based on the case type) and write the structure out to disk; 214 * when the module or fmd is restarted this structure is restored from 215 * disk for you and reassociated with the case - use swde_case_data to 216 * retrieve a pointer to it. 217 * 218 * swde_case_first, swde_case_next 219 * A subsidiary DE can iterate over its cases using swde_case_first and 220 * swde_case_next. For swde_case_first quote the subsidiary id; 221 * for swde_case_next quote the last case returned. 222 * 223 * swde_case_data 224 * Returns a pointer to the previously-serialized case data, and fills 225 * a uint32_t with the version of that serialized data. 226 * 227 * swde_case_data_write 228 * Whenever a subsidiary modifies its persistent data structure 229 * it must call swde_case_data_write to indicate that the associated 230 * fmd buffer is dirty and needs to be rewritten. 231 * 232 * swde_case_data_upgrade 233 * If the subsidiary ever revs its persistent structure it needs to call 234 * swde_case_data_upgrade to register the new version and structure size, 235 * and write the structure out to a reallocated fmd buffer; the old 236 * case data structure (if any) will be freed. A subsidiary may use 237 * this interface to migrate old persistence structures restored from 238 * checkpoint - swde_case_data will return a version number below the 239 * current. 240 */ 241 242 extern fmd_case_t *swde_case_open(fmd_hdl_t *, id_t, char *, uint32_t, 243 void *, size_t); 244 extern fmd_case_t *swde_case_first(fmd_hdl_t *, id_t); 245 extern fmd_case_t *swde_case_next(fmd_hdl_t *, fmd_case_t *); 246 extern void *swde_case_data(fmd_hdl_t *, fmd_case_t *, uint32_t *); 247 extern void swde_case_data_write(fmd_hdl_t *, fmd_case_t *); 248 extern void swde_case_data_upgrade(fmd_hdl_t *, fmd_case_t *, uint32_t, 249 void *, size_t); 250 251 #ifdef __cplusplus 252 } 253 #endif 254 255 #endif /* _SW_H */ 256