1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * /dev/bmc IPMI monitor 31 * 32 * The purpose of this module is to monitor the connection between the system 33 * and the service processor attached via /dev/bmc. The module assumes the SP 34 * supports the Sun OEM uptime IPMI command. If the BMC connection does not 35 * exist, or the uptime function is not implemented, then the module unloads 36 * without doing anything. 37 * 38 * When the module is first loaded, or a reset is detected, the module will 39 * generate the ESC_PLATFORM_SP_RESET sysevent as a system-wide notification to 40 * indicate that this event has occurred. 41 * 42 * Note that this event generation is not guaranteed to have a one-to-one 43 * correspondence with an SP reset. There is no persistence, so if fmd is 44 * restarted we will generate this event again. Thus the event only indicates 45 * the possibility that the SP has been reset. This could be enhanced using fmd 46 * checkpoints to have some persistent state to avoid this scenario. However, 47 * it currently serves the useful dual purpose of notifying consumers of system 48 * startup as well as SP reset through a single channel. 49 */ 50 51 #include <errno.h> 52 #include <libipmi.h> 53 #include <libsysevent.h> 54 #include <string.h> 55 #include <fm/fmd_api.h> 56 #include <sys/sysevent/eventdefs.h> 57 58 typedef struct sp_monitor { 59 ipmi_handle_t *sm_hdl; 60 uint32_t sm_seconds; 61 uint32_t sm_generation; 62 hrtime_t sm_interval; 63 } sp_monitor_t; 64 65 static void 66 sp_post_sysevent(fmd_hdl_t *hdl) 67 { 68 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 69 sysevent_id_t eid; 70 71 fmd_hdl_debug(hdl, "SP reset detected, posting sysevent"); 72 73 if (sysevent_post_event(EC_PLATFORM, ESC_PLATFORM_SP_RESET, 74 SUNW_VENDOR, "fmd", NULL, &eid) != 0) { 75 fmd_hdl_debug(hdl, "failed to send sysevent: %s", 76 strerror(errno)); 77 /* 78 * We reset the seconds and generation so that the next time 79 * through we will try to post the sysevent again. 80 */ 81 smp->sm_seconds = -1U; 82 smp->sm_generation = -1U; 83 } 84 } 85 86 /*ARGSUSED*/ 87 static void 88 sp_timeout(fmd_hdl_t *hdl, id_t id, void *data) 89 { 90 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 91 uint32_t seconds, generation; 92 93 if (ipmi_sunoem_uptime(smp->sm_hdl, &seconds, &generation) != 0) { 94 /* 95 * Ignore uptime failures. We will generate the appropriate 96 * event when it comes back online. 97 */ 98 fmd_hdl_debug(hdl, "failed to get uptime: %s", 99 ipmi_errmsg(smp->sm_hdl)); 100 } else { 101 /* 102 * We want to catch cases where the generation number is 103 * explicitly reset, or when the SP configuration is reset after 104 * a reboot (and the generation number is 0). We also post a 105 * sysevent when the module initially loads, since we can't be 106 * sure if we missed a SP reset or not. 107 */ 108 if (seconds < smp->sm_seconds || 109 generation != smp->sm_generation || 110 smp->sm_seconds == 0) 111 sp_post_sysevent(hdl); 112 113 smp->sm_seconds = seconds; 114 smp->sm_generation = generation; 115 } 116 117 (void) fmd_timer_install(hdl, NULL, NULL, smp->sm_interval); 118 } 119 120 static const fmd_hdl_ops_t fmd_ops = { 121 NULL, /* fmdo_recv */ 122 sp_timeout, /* fmdo_timeout */ 123 NULL, /* fmdo_close */ 124 NULL, /* fmdo_stats */ 125 NULL, /* fmdo_gc */ 126 }; 127 128 static const fmd_prop_t fmd_props[] = { 129 { "interval", FMD_TYPE_TIME, "60sec" }, 130 { NULL, 0, NULL } 131 }; 132 133 static const fmd_hdl_info_t fmd_info = { 134 "Service Processor Monitor", "1.0", &fmd_ops, fmd_props 135 }; 136 137 void 138 _fmd_init(fmd_hdl_t *hdl) 139 { 140 sp_monitor_t *smp; 141 int error; 142 char *msg; 143 144 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) 145 return; 146 147 smp = fmd_hdl_zalloc(hdl, sizeof (sp_monitor_t), FMD_SLEEP); 148 fmd_hdl_setspecific(hdl, smp); 149 150 if ((smp->sm_hdl = ipmi_open(&error, &msg)) == NULL) { 151 /* 152 * If /dev/bmc doesn't exist on the system, then unload the 153 * module without doing anything. 154 */ 155 if (error != EIPMI_BMC_OPEN_FAILED) 156 fmd_hdl_abort(hdl, "failed to initialize IPMI " 157 "connection: %s\n", msg); 158 fmd_hdl_debug(hdl, "failed to load: no IPMI connection " 159 "present"); 160 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 161 fmd_hdl_unregister(hdl); 162 return; 163 } 164 165 /* 166 * Attempt an initial uptime() call. If the IPMI command is 167 * unrecognized, then this is an unsupported platform and the module 168 * should be unloaded. Any other error is treated is transient failure. 169 */ 170 if ((error = ipmi_sunoem_uptime(smp->sm_hdl, &smp->sm_seconds, 171 &smp->sm_generation)) != 0 && 172 ipmi_errno(smp->sm_hdl) == EIPMI_INVALID_COMMAND) { 173 fmd_hdl_debug(hdl, "failed to load: uptime command " 174 "not supported"); 175 ipmi_close(smp->sm_hdl); 176 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 177 fmd_hdl_unregister(hdl); 178 return; 179 } 180 181 smp->sm_interval = fmd_prop_get_int64(hdl, "interval"); 182 183 if (error == 0) 184 fmd_hdl_debug(hdl, "successfully loaded, uptime = %u seconds " 185 "(generation %u)", smp->sm_seconds, smp->sm_generation); 186 else 187 fmd_hdl_debug(hdl, "successfully loaded, but uptime call " 188 "failed: %s", ipmi_errmsg(smp->sm_hdl)); 189 190 /* 191 * Setup the recurring timer. 192 */ 193 (void) fmd_timer_install(hdl, NULL, NULL, 0); 194 } 195 196 void 197 _fmd_fini(fmd_hdl_t *hdl) 198 { 199 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 200 201 if (smp) { 202 ipmi_close(smp->sm_hdl); 203 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 204 } 205 } 206