1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/sysevent/eventdefs.h> 31 #include <sys/sysevent.h> 32 #include <sys/sysevent_impl.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/sysmacros.h> 35 #include <sys/dumphdr.h> 36 #include <sys/dumpadm.h> 37 38 #include <libsysevent.h> 39 #include <libnvpair.h> 40 #include <alloca.h> 41 #include <limits.h> 42 #include <strings.h> 43 #include <unistd.h> 44 #include <fcntl.h> 45 #include <errno.h> 46 47 #undef MUTEX_HELD 48 #undef RW_READ_HELD 49 #undef RW_WRITE_HELD 50 51 #include <fmd_api.h> 52 #include <fmd_log.h> 53 #include <fmd_subr.h> 54 #include <fmd_dispq.h> 55 #include <fmd_module.h> 56 #include <fmd_scheme.h> 57 #include <fmd_error.h> 58 59 #include <fmd.h> 60 61 static char *sysev_channel; /* event channel to which we are subscribed */ 62 static char *sysev_class; /* event class to which we are subscribed */ 63 static char *sysev_device; /* device path to use for replaying events */ 64 static char *sysev_sid; /* event channel subscriber identifier */ 65 static void *sysev_evc; /* event channel cookie from evc_bind */ 66 67 static fmd_xprt_t *sysev_xprt; 68 static fmd_hdl_t *sysev_hdl; 69 70 static struct sysev_stats { 71 fmd_stat_t dump_replay; 72 fmd_stat_t dump_lost; 73 fmd_stat_t bad_class; 74 fmd_stat_t bad_attr; 75 fmd_stat_t eagain; 76 } sysev_stats = { 77 { "dump_replay", FMD_TYPE_UINT64, "events replayed from dump device" }, 78 { "dump_lost", FMD_TYPE_UINT64, "events lost from dump device" }, 79 { "bad_class", FMD_TYPE_UINT64, "events dropped due to invalid class" }, 80 { "bad_attr", FMD_TYPE_UINT64, "events dropped due to invalid nvlist" }, 81 { "eagain", FMD_TYPE_UINT64, "events retried due to low memory" }, 82 }; 83 84 /* 85 * Receive an event from the SysEvent channel and post it to our transport. 86 * Under extreme low-memory situations where we cannot event unpack the event, 87 * we can request that SysEvent redeliver the event later by returning EAGAIN. 88 * If we do this too many times, the kernel will drop the event. Rather than 89 * keeping state per-event, we simply attempt a garbage-collect, hoping that 90 * enough free memory will be available by the time the event is redelivered. 91 */ 92 static int 93 sysev_recv(sysevent_t *sep, void *arg) 94 { 95 uint64_t seq = sysevent_get_seq(sep); 96 fmd_xprt_t *xp = arg; 97 nvlist_t *nvl; 98 hrtime_t hrt; 99 100 if (strcmp(sysevent_get_class_name(sep), EC_FM) != 0) { 101 fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: unexpected" 102 " transport class %s\n", seq, sysevent_get_class_name(sep)); 103 sysev_stats.bad_class.fmds_value.ui64++; 104 return (0); 105 } 106 107 if (sysevent_get_attr_list(sep, &nvl) != 0) { 108 if (errno == EAGAIN || errno == ENOMEM) { 109 fmd_modhash_tryapply(fmd.d_mod_hash, fmd_module_trygc); 110 fmd_scheme_hash_trygc(fmd.d_schemes); 111 sysev_stats.eagain.fmds_value.ui64++; 112 return (EAGAIN); 113 } 114 115 fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: missing " 116 "or invalid payload", seq); 117 sysev_stats.bad_attr.fmds_value.ui64++; 118 return (0); 119 } 120 121 sysevent_get_time(sep, &hrt); 122 fmd_xprt_post(sysev_hdl, xp, nvl, hrt); 123 return (0); 124 } 125 126 /* 127 * Checksum algorithm used by the dump transport for verifying the content of 128 * error reports saved on the dump device (copy of the kernel's checksum32()). 129 */ 130 static uint32_t 131 sysev_checksum(void *cp_arg, size_t length) 132 { 133 uchar_t *cp, *ep; 134 uint32_t sum = 0; 135 136 for (cp = cp_arg, ep = cp + length; cp < ep; cp++) 137 sum = ((sum >> 1) | (sum << 31)) + *cp; 138 139 return (sum); 140 } 141 142 /* 143 * Replay saved events from the dump transport. This function is installed as 144 * the timer callback and is called only once during the module's lifetime. 145 */ 146 /*ARGSUSED*/ 147 static void 148 sysev_replay(fmd_hdl_t *hdl, id_t id, void *arg) 149 { 150 char *dumpdev; 151 off64_t off, off0; 152 int fd, err; 153 154 /* 155 * Determine the appropriate dump device to use for replaying pending 156 * error reports. If the device property is NULL (default), we 157 * open and query /dev/dump to determine the current dump device. 158 */ 159 if ((dumpdev = sysev_device) == NULL) { 160 if ((fd = open("/dev/dump", O_RDONLY)) == -1) { 161 fmd_hdl_error(hdl, "failed to open /dev/dump " 162 "to locate dump device for event replay"); 163 return; 164 } 165 166 dumpdev = alloca(PATH_MAX); 167 err = ioctl(fd, DIOCGETDEV, dumpdev); 168 (void) close(fd); 169 170 if (err == -1) { 171 if (errno != ENODEV) { 172 fmd_hdl_error(hdl, "failed to obtain " 173 "path to dump device for event replay"); 174 } 175 return; 176 } 177 } 178 179 if (strcmp(dumpdev, "/dev/null") == 0) 180 return; /* return silently and skip replay for /dev/null */ 181 182 /* 183 * Open the appropriate device and then determine the offset of the 184 * start of the ereport dump region located at the end of the device. 185 */ 186 if ((fd = open64(dumpdev, O_RDWR | O_DSYNC)) == -1) { 187 fmd_hdl_error(hdl, "failed to open dump transport %s " 188 "(pending events will not be replayed)", dumpdev); 189 return; 190 } 191 192 off = DUMP_OFFSET + DUMP_LOGSIZE + DUMP_ERPTSIZE; 193 off = off0 = lseek64(fd, -off, SEEK_END) & -DUMP_OFFSET; 194 195 if (off == (off64_t)-1LL) { 196 fmd_hdl_error(hdl, "failed to seek dump transport %s " 197 "(pending events will not be replayed)", dumpdev); 198 (void) close(fd); 199 return; 200 } 201 202 /* 203 * The ereport dump region is a sequence of erpt_dump_t headers each of 204 * which is followed by packed nvlist data. We iterate over them in 205 * order, unpacking and dispatching each one to our dispatch queue. 206 */ 207 for (;;) { 208 char nvbuf[ERPT_DATA_SZ]; 209 uint32_t chksum; 210 erpt_dump_t ed; 211 nvlist_t *nvl; 212 213 fmd_timeval_t ftv, tod; 214 hrtime_t hrt; 215 uint64_t ena; 216 217 if (pread64(fd, &ed, sizeof (ed), off) != sizeof (ed)) { 218 fmd_hdl_error(hdl, "failed to read from dump " 219 "transport %s (pending events lost)", dumpdev); 220 break; 221 } 222 223 if (ed.ed_magic == 0 && ed.ed_size == 0) 224 break; /* end of list: all zero */ 225 226 if (ed.ed_magic == 0) { 227 off += sizeof (ed) + ed.ed_size; 228 continue; /* continue searching */ 229 } 230 231 if (ed.ed_magic != ERPT_MAGIC) { 232 /* 233 * Stop reading silently if the first record has the 234 * wrong magic number; this likely indicates that we 235 * rebooted from non-FMA bits or paged over the dump. 236 */ 237 if (off == off0) 238 break; 239 240 fmd_hdl_error(hdl, "invalid dump transport " 241 "record at %llx (magic number %x, expected %x)\n", 242 (u_longlong_t)off, ed.ed_magic, ERPT_MAGIC); 243 break; 244 } 245 246 if (ed.ed_size > ERPT_DATA_SZ) { 247 fmd_hdl_error(hdl, "invalid dump transport " 248 "record at %llx size (%u exceeds limit)\n", 249 (u_longlong_t)off, ed.ed_size); 250 break; 251 } 252 253 if (pread64(fd, nvbuf, ed.ed_size, 254 off + sizeof (ed)) != ed.ed_size) { 255 fmd_hdl_error(hdl, "failed to read dump " 256 "transport event (offset %llx)", (u_longlong_t)off); 257 258 sysev_stats.dump_lost.fmds_value.ui64++; 259 goto next; 260 } 261 262 if ((chksum = sysev_checksum(nvbuf, 263 ed.ed_size)) != ed.ed_chksum) { 264 fmd_hdl_error(hdl, "dump transport event at " 265 "offset %llx is corrupt (checksum %x != %x)\n", 266 (u_longlong_t)off, chksum, ed.ed_chksum); 267 268 sysev_stats.dump_lost.fmds_value.ui64++; 269 goto next; 270 } 271 272 if ((err = nvlist_xunpack(nvbuf, 273 ed.ed_size, &nvl, &fmd.d_nva)) != 0) { 274 fmd_hdl_error(hdl, "failed to unpack dump " 275 "transport event at offset %llx: %s\n", 276 (u_longlong_t)off, fmd_strerror(err)); 277 278 sysev_stats.dump_lost.fmds_value.ui64++; 279 goto next; 280 } 281 282 /* 283 * If ed_hrt_nsec is set it contains the gethrtime() value from 284 * when the event was originally enqueued for the transport. 285 * If it is zero, we use the weaker bound ed_hrt_base instead. 286 */ 287 if (ed.ed_hrt_nsec != 0) 288 hrt = ed.ed_hrt_nsec; 289 else 290 hrt = ed.ed_hrt_base; 291 292 /* 293 * If this is an FMA protocol event of class "ereport.*" that 294 * contains valid ENA, we can improve the precision of 'hrt'. 295 */ 296 if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) == 0) 297 hrt = fmd_time_ena2hrt(hrt, ena); 298 299 /* 300 * Now convert 'hrt' to an adjustable TOD based on the values 301 * in ed_tod_base which correspond to one another and are 302 * sampled before reboot using the old gethrtime() clock. 303 * fmd_event_recreate() will use this TOD value to re-assign 304 * the event an updated gethrtime() value based on the current 305 * value of the non-adjustable gethrtime() clock. Phew. 306 */ 307 tod.ftv_sec = ed.ed_tod_base.sec; 308 tod.ftv_nsec = ed.ed_tod_base.nsec; 309 fmd_time_hrt2tod(ed.ed_hrt_base, &tod, hrt, &ftv); 310 311 (void) nvlist_remove_all(nvl, FMD_EVN_TOD); 312 (void) nvlist_add_uint64_array(nvl, 313 FMD_EVN_TOD, (uint64_t *)&ftv, 2); 314 315 fmd_xprt_post(hdl, sysev_xprt, nvl, 0); 316 sysev_stats.dump_replay.fmds_value.ui64++; 317 318 next: 319 /* 320 * Reset the magic number for the event record to zero so that 321 * we do not replay the same event multiple times. 322 */ 323 ed.ed_magic = 0; 324 325 if (pwrite64(fd, &ed, sizeof (ed), off) != sizeof (ed)) { 326 fmd_hdl_error(hdl, "failed to mark dump " 327 "transport event (offset %llx)", (u_longlong_t)off); 328 } 329 330 off += sizeof (ed) + ed.ed_size; 331 } 332 333 (void) close(fd); 334 } 335 336 static const fmd_prop_t sysev_props[] = { 337 { "class", FMD_TYPE_STRING, EC_ALL }, /* event class */ 338 { "device", FMD_TYPE_STRING, NULL }, /* replay device */ 339 { "channel", FMD_TYPE_STRING, FM_ERROR_CHAN }, /* channel name */ 340 { "sid", FMD_TYPE_STRING, "fmd" }, /* subscriber id */ 341 { NULL, 0, NULL } 342 }; 343 344 static const fmd_hdl_ops_t sysev_ops = { 345 NULL, /* fmdo_recv */ 346 sysev_replay, /* fmdo_timeout */ 347 NULL, /* fmdo_close */ 348 NULL, /* fmdo_stats */ 349 NULL, /* fmdo_gc */ 350 NULL, /* fmdo_send */ 351 }; 352 353 static const fmd_hdl_info_t sysev_info = { 354 "SysEvent Transport Agent", "1.0", &sysev_ops, sysev_props 355 }; 356 357 /* 358 * Bind to the sysevent channel we use for listening for error events and then 359 * subscribe to appropriate events received over this channel. 360 */ 361 void 362 sysev_init(fmd_hdl_t *hdl) 363 { 364 uint_t flags; 365 366 if (fmd_hdl_register(hdl, FMD_API_VERSION, &sysev_info) != 0) 367 return; /* invalid property settings */ 368 369 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (sysev_stats) / 370 sizeof (fmd_stat_t), (fmd_stat_t *)&sysev_stats); 371 372 sysev_channel = fmd_prop_get_string(hdl, "channel"); 373 sysev_class = fmd_prop_get_string(hdl, "class"); 374 sysev_device = fmd_prop_get_string(hdl, "device"); 375 sysev_sid = fmd_prop_get_string(hdl, "sid"); 376 377 if (sysev_channel == NULL) 378 fmd_hdl_abort(hdl, "channel property must be defined\n"); 379 380 if (sysev_sid == NULL) 381 fmd_hdl_abort(hdl, "sid property must be defined\n"); 382 383 if ((errno = sysevent_evc_bind(sysev_channel, &sysev_evc, 384 EVCH_CREAT | EVCH_HOLD_PEND)) != 0) { 385 fmd_hdl_abort(hdl, "failed to bind to event transport " 386 "channel %s", sysev_channel); 387 } 388 389 sysev_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 390 sysev_hdl = hdl; 391 392 /* 393 * If we're subscribing to the default channel, keep our subscription 394 * active even if we die unexpectedly so we continue queuing events. 395 * If we're not (e.g. running under fmsim), do not specify SUB_KEEP so 396 * that our event channel will be destroyed if we die unpleasantly. 397 */ 398 if (strcmp(sysev_channel, FM_ERROR_CHAN) == 0) 399 flags = EVCH_SUB_KEEP | EVCH_SUB_DUMP; 400 else 401 flags = EVCH_SUB_DUMP; 402 403 errno = sysevent_evc_subscribe(sysev_evc, 404 sysev_sid, sysev_class, sysev_recv, sysev_xprt, flags); 405 406 if (errno != 0) { 407 if (errno == EEXIST) { 408 fmd_hdl_abort(hdl, "another fault management daemon is " 409 "active on transport channel %s\n", sysev_channel); 410 } else { 411 fmd_hdl_abort(hdl, "failed to subscribe to %s on " 412 "transport channel %s", sysev_class, sysev_channel); 413 } 414 } 415 416 /* 417 * Once the transport is open, install a single timer to fire at once 418 * in the context of the module's thread to run sysev_replay(). This 419 * thread will block in its first fmd_xprt_post() until fmd is ready. 420 */ 421 fmd_hdl_debug(hdl, "transport '%s' open\n", sysev_channel); 422 (void) fmd_timer_install(hdl, NULL, NULL, 0); 423 } 424 425 /* 426 * Close the channel by unsubscribing and unbinding. We only do this when a 427 * a non-default channel has been selected. If we're using FM_ERROR_CHAN, 428 * the system default, we do *not* want to unsubscribe because the kernel will 429 * remove the subscriber queue and any events published in our absence will 430 * therefore be lost. This scenario may occur when, for example, fmd is sent 431 * a SIGTERM by init(1M) during reboot but an error is detected and makes it 432 * into the sysevent channel queue before init(1M) manages to call uadmin(2). 433 */ 434 void 435 sysev_fini(fmd_hdl_t *hdl) 436 { 437 if (strcmp(sysev_channel, FM_ERROR_CHAN) != 0) { 438 sysevent_evc_unsubscribe(sysev_evc, sysev_sid); 439 sysevent_evc_unbind(sysev_evc); 440 } 441 442 if (sysev_xprt != NULL) 443 fmd_xprt_close(hdl, sysev_xprt); 444 445 fmd_prop_free_string(hdl, sysev_class); 446 fmd_prop_free_string(hdl, sysev_channel); 447 fmd_prop_free_string(hdl, sysev_device); 448 fmd_prop_free_string(hdl, sysev_sid); 449 } 450