1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2012 Joyent, Inc. All rights reserved. 26 */ 27 28 /* 29 * /dev/ipmi IPMI monitor 30 * 31 * The purpose of this module is to monitor the connection between the system 32 * and the service processor attached via /dev/ipmi0. The module assumes the SP 33 * supports the Sun OEM uptime IPMI command. If the BMC connection does not 34 * exist, or the uptime function is not implemented, then the module unloads 35 * without doing anything. 36 * 37 * When the module is first loaded, or a reset is detected, the module will 38 * generate the ESC_PLATFORM_SP_RESET sysevent as a system-wide notification to 39 * indicate that this event has occurred. 40 * 41 * Note that this event generation is not guaranteed to have a one-to-one 42 * correspondence with an SP reset. There is no persistence, so if fmd is 43 * restarted we will generate this event again. Thus the event only indicates 44 * the possibility that the SP has been reset. This could be enhanced using fmd 45 * checkpoints to have some persistent state to avoid this scenario. However, 46 * it currently serves the useful dual purpose of notifying consumers of system 47 * startup as well as SP reset through a single channel. 48 */ 49 50 #include <errno.h> 51 #include <libipmi.h> 52 #include <libsysevent.h> 53 #include <string.h> 54 #include <fm/fmd_api.h> 55 #include <sys/sysevent/eventdefs.h> 56 57 typedef struct sp_monitor { 58 ipmi_handle_t *sm_hdl; 59 uint32_t sm_seconds; 60 uint32_t sm_generation; 61 hrtime_t sm_interval; 62 } sp_monitor_t; 63 64 static void 65 sp_post_sysevent(fmd_hdl_t *hdl) 66 { 67 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 68 sysevent_id_t eid; 69 70 fmd_hdl_debug(hdl, "SP reset detected, posting sysevent"); 71 72 if (sysevent_post_event(EC_PLATFORM, ESC_PLATFORM_SP_RESET, 73 SUNW_VENDOR, "fmd", NULL, &eid) != 0) { 74 fmd_hdl_debug(hdl, "failed to send sysevent: %s", 75 strerror(errno)); 76 /* 77 * We reset the seconds and generation so that the next time 78 * through we will try to post the sysevent again. 79 */ 80 smp->sm_seconds = -1U; 81 smp->sm_generation = -1U; 82 } 83 } 84 85 /*ARGSUSED*/ 86 static void 87 sp_timeout(fmd_hdl_t *hdl, id_t id, void *data) 88 { 89 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 90 uint32_t seconds, generation; 91 92 if (ipmi_sunoem_uptime(smp->sm_hdl, &seconds, &generation) != 0) { 93 /* 94 * Ignore uptime failures. We will generate the appropriate 95 * event when it comes back online. 96 */ 97 fmd_hdl_debug(hdl, "failed to get uptime: %s", 98 ipmi_errmsg(smp->sm_hdl)); 99 } else { 100 /* 101 * We want to catch cases where the generation number is 102 * explicitly reset, or when the SP configuration is reset after 103 * a reboot (and the generation number is 0). We also post a 104 * sysevent when the module initially loads, since we can't be 105 * sure if we missed a SP reset or not. 106 */ 107 if (seconds < smp->sm_seconds || 108 generation != smp->sm_generation || 109 smp->sm_seconds == 0) 110 sp_post_sysevent(hdl); 111 112 smp->sm_seconds = seconds; 113 smp->sm_generation = generation; 114 } 115 116 (void) fmd_timer_install(hdl, NULL, NULL, smp->sm_interval); 117 } 118 119 static const fmd_hdl_ops_t fmd_ops = { 120 NULL, /* fmdo_recv */ 121 sp_timeout, /* fmdo_timeout */ 122 NULL, /* fmdo_close */ 123 NULL, /* fmdo_stats */ 124 NULL, /* fmdo_gc */ 125 }; 126 127 static const fmd_prop_t fmd_props[] = { 128 { "interval", FMD_TYPE_TIME, "60sec" }, 129 { NULL, 0, NULL } 130 }; 131 132 static const fmd_hdl_info_t fmd_info = { 133 "Service Processor Monitor", "1.0", &fmd_ops, fmd_props 134 }; 135 136 void 137 _fmd_init(fmd_hdl_t *hdl) 138 { 139 sp_monitor_t *smp; 140 int error; 141 char *msg; 142 143 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) 144 return; 145 146 smp = fmd_hdl_zalloc(hdl, sizeof (sp_monitor_t), FMD_SLEEP); 147 fmd_hdl_setspecific(hdl, smp); 148 149 if ((smp->sm_hdl = ipmi_open(&error, &msg, IPMI_TRANSPORT_BMC, NULL)) 150 == NULL) { 151 /* 152 * If /dev/ipmi0 doesn't exist on the system, then unload the 153 * module without doing anything. 154 */ 155 if (error != EIPMI_BMC_OPEN_FAILED) 156 fmd_hdl_abort(hdl, "failed to initialize IPMI " 157 "connection: %s\n", msg); 158 fmd_hdl_debug(hdl, "failed to load: no IPMI connection " 159 "present"); 160 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 161 fmd_hdl_unregister(hdl); 162 return; 163 } 164 165 /* 166 * Attempt an initial uptime() call. If the IPMI command is 167 * unrecognized, then this is an unsupported platform and the module 168 * should be unloaded. Any other error is treated is transient failure. 169 */ 170 if ((error = ipmi_sunoem_uptime(smp->sm_hdl, &smp->sm_seconds, 171 &smp->sm_generation)) != 0 && 172 ipmi_errno(smp->sm_hdl) == EIPMI_INVALID_COMMAND) { 173 fmd_hdl_debug(hdl, "failed to load: uptime command " 174 "not supported"); 175 ipmi_close(smp->sm_hdl); 176 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 177 fmd_hdl_unregister(hdl); 178 return; 179 } 180 181 smp->sm_interval = fmd_prop_get_int64(hdl, "interval"); 182 183 if (error == 0) 184 fmd_hdl_debug(hdl, "successfully loaded, uptime = %u seconds " 185 "(generation %u)", smp->sm_seconds, smp->sm_generation); 186 else 187 fmd_hdl_debug(hdl, "successfully loaded, but uptime call " 188 "failed: %s", ipmi_errmsg(smp->sm_hdl)); 189 190 /* 191 * Setup the recurring timer. 192 */ 193 (void) fmd_timer_install(hdl, NULL, NULL, 0); 194 } 195 196 void 197 _fmd_fini(fmd_hdl_t *hdl) 198 { 199 sp_monitor_t *smp = fmd_hdl_getspecific(hdl); 200 201 if (smp) { 202 ipmi_close(smp->sm_hdl); 203 fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t)); 204 } 205 } 206