1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * /dev/bmc IPMI monitor 29 * 30 * The purpose of this module is to monitor the connection between the system 31 * and the service processor attached via /dev/bmc. The module assumes the SP 32 * supports the Sun OEM uptime IPMI command. If the BMC connection does not 33 * exist, or the uptime function is not implemented, then the module unloads 34 * without doing anything. 35 * 36 * When the module is first loaded, or a reset is detected, the module will 37 * generate the ESC_PLATFORM_SP_RESET sysevent as a system-wide notification to 38 * indicate that this event has occurred. 39 * 40 * Note that this event generation is not guaranteed to have a one-to-one 41 * correspondence with an SP reset. There is no persistence, so if fmd is 42 * restarted we will generate this event again. Thus the event only indicates 43 * the possibility that the SP has been reset. This could be enhanced using fmd 44 * checkpoints to have some persistent state to avoid this scenario. However, 45 * it currently serves the useful dual purpose of notifying consumers of system 46 * startup as well as SP reset through a single channel. 47 */ 48 49 #include <errno.h> 50 #include <libipmi.h> 51 #include <libsysevent.h> 52 #include <string.h> 53 #include <fm/fmd_api.h> 54 #include <sys/sysevent/eventdefs.h> 55 56 typedef struct sp_monitor { 57 ipmi_handle_t *sm_hdl; 58 uint32_t sm_seconds; 59 uint32_t sm_generation; 60 hrtime_t sm_interval; 61 } sp_monitor_t; 62 63 static void 64 sp_post_sysevent(fmd_hdl_t *hdl) 65 { 66 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 67 sysevent_id_t eid; 68 69 fmd_hdl_debug(hdl, "SP reset detected, posting sysevent"); 70 71 if (sysevent_post_event(EC_PLATFORM, ESC_PLATFORM_SP_RESET, 72 SUNW_VENDOR, "fmd", NULL, &eid) != 0) { 73 fmd_hdl_debug(hdl, "failed to send sysevent: %s", 74 strerror(errno)); 75 /* 76 * We reset the seconds and generation so that the next time 77 * through we will try to post the sysevent again. 78 */ 79 smp->sm_seconds = -1U; 80 smp->sm_generation = -1U; 81 } 82 } 83 84 /*ARGSUSED*/ 85 static void 86 sp_timeout(fmd_hdl_t *hdl, id_t id, void *data) 87 { 88 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 89 uint32_t seconds, generation; 90 91 if (ipmi_sunoem_uptime(smp->sm_hdl, &seconds, &generation) != 0) { 92 /* 93 * Ignore uptime failures. We will generate the appropriate 94 * event when it comes back online. 95 */ 96 fmd_hdl_debug(hdl, "failed to get uptime: %s", 97 ipmi_errmsg(smp->sm_hdl)); 98 } else { 99 /* 100 * We want to catch cases where the generation number is 101 * explicitly reset, or when the SP configuration is reset after 102 * a reboot (and the generation number is 0). We also post a 103 * sysevent when the module initially loads, since we can't be 104 * sure if we missed a SP reset or not. 105 */ 106 if (seconds < smp->sm_seconds || 107 generation != smp->sm_generation || 108 smp->sm_seconds == 0) 109 sp_post_sysevent(hdl); 110 111 smp->sm_seconds = seconds; 112 smp->sm_generation = generation; 113 } 114 115 (void) fmd_timer_install(hdl, NULL, NULL, smp->sm_interval); 116 } 117 118 static const fmd_hdl_ops_t fmd_ops = { 119 NULL, /* fmdo_recv */ 120 sp_timeout, /* fmdo_timeout */ 121 NULL, /* fmdo_close */ 122 NULL, /* fmdo_stats */ 123 NULL, /* fmdo_gc */ 124 }; 125 126 static const fmd_prop_t fmd_props[] = { 127 { "interval", FMD_TYPE_TIME, "60sec" }, 128 { NULL, 0, NULL } 129 }; 130 131 static const fmd_hdl_info_t fmd_info = { 132 "Service Processor Monitor", "1.0", &fmd_ops, fmd_props 133 }; 134 135 void 136 _fmd_init(fmd_hdl_t *hdl) 137 { 138 sp_monitor_t *smp; 139 int error; 140 char *msg; 141 142 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) 143 return; 144 145 smp = fmd_hdl_zalloc(hdl, sizeof (sp_monitor_t), FMD_SLEEP); 146 fmd_hdl_setspecific(hdl, smp); 147 148 if ((smp->sm_hdl = ipmi_open(&error, &msg, IPMI_TRANSPORT_BMC, NULL)) 149 == NULL) { 150 /* 151 * If /dev/bmc doesn't exist on the system, then unload the 152 * module without doing anything. 153 */ 154 if (error != EIPMI_BMC_OPEN_FAILED) 155 fmd_hdl_abort(hdl, "failed to initialize IPMI " 156 "connection: %s\n", msg); 157 fmd_hdl_debug(hdl, "failed to load: no IPMI connection " 158 "present"); 159 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 160 fmd_hdl_unregister(hdl); 161 return; 162 } 163 164 /* 165 * Attempt an initial uptime() call. If the IPMI command is 166 * unrecognized, then this is an unsupported platform and the module 167 * should be unloaded. Any other error is treated is transient failure. 168 */ 169 if ((error = ipmi_sunoem_uptime(smp->sm_hdl, &smp->sm_seconds, 170 &smp->sm_generation)) != 0 && 171 ipmi_errno(smp->sm_hdl) == EIPMI_INVALID_COMMAND) { 172 fmd_hdl_debug(hdl, "failed to load: uptime command " 173 "not supported"); 174 ipmi_close(smp->sm_hdl); 175 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 176 fmd_hdl_unregister(hdl); 177 return; 178 } 179 180 smp->sm_interval = fmd_prop_get_int64(hdl, "interval"); 181 182 if (error == 0) 183 fmd_hdl_debug(hdl, "successfully loaded, uptime = %u seconds " 184 "(generation %u)", smp->sm_seconds, smp->sm_generation); 185 else 186 fmd_hdl_debug(hdl, "successfully loaded, but uptime call " 187 "failed: %s", ipmi_errmsg(smp->sm_hdl)); 188 189 /* 190 * Setup the recurring timer. 191 */ 192 (void) fmd_timer_install(hdl, NULL, NULL, 0); 193 } 194 195 void 196 _fmd_fini(fmd_hdl_t *hdl) 197 { 198 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 199 200 if (smp) { 201 ipmi_close(smp->sm_hdl); 202 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 203 } 204 } 205