1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * FMD Log File Subsystem 29 * 30 * Events are written to one of two log files as they are received or created; 31 * the error log tracks all ereport.* events received on the inbound event 32 * transport, and the fault log tracks all list.* events generated by fmd or 33 * its client modules. In addition, we use the same log file format to cache 34 * state and events associated with ASRUs that are named in a diagnosis. 35 * 36 * The log files use the exacct format manipulated by libexacct(3LIB) and 37 * originally defined in PSARC 1999/119. However, the exacct library was 38 * designed primarily for read-only clients and without the synchronous i/o 39 * considerations and seeking required for fmd, so we use libexacct here only 40 * to read and write the file headers and to pack data from memory into a file 41 * bytestream. All of the i/o and file offset manipulations are performed by 42 * the fmd code below. Our exacct file management uses the following grammar: 43 * 44 * file := hdr toc event* 45 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER 46 * EXD_FMA_PLAT EXD_FMA_UUID 47 * toc := EXD_FMA_OFFSET 48 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* or legacy evref 49 * evref := EXD_FMA_UUID EXD_FMA_OFFSET 50 * legacy evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET 51 * 52 * Any event can be uniquely identified by the tuple (file, offset) where file 53 * is encoded as (uuid) when we are cross-linking files. For legacy file 54 * formats we still support encoding the reference as (major, minor, inode). 55 * Note that we break out of the file's dev_t into its two 32-bit components to 56 * permit development of either 32-bit or 64-bit log readers and writers; the 57 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way 58 * for a 32-bit application to retrieve and store a 64-bit dev_t. 59 * 60 * In order to replay events in the event of an fmd crash, events are initially 61 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the 62 * fmd_log_append() function. Later, once an event transitions from the 63 * received state to one of its other states (see fmd_event.c for details), 64 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating 65 * that the event is fully processed and no longer needs to be replayed. 66 */ 67 68 #include <sys/types.h> 69 #include <sys/mkdev.h> 70 #include <sys/statvfs.h> 71 #include <sys/fm/protocol.h> 72 #include <sys/exacct_impl.h> 73 #include <uuid/uuid.h> 74 75 #include <unistd.h> 76 #include <limits.h> 77 #include <fcntl.h> 78 #include <ctype.h> 79 80 #include <fmd_alloc.h> 81 #include <fmd_error.h> 82 #include <fmd_string.h> 83 #include <fmd_event.h> 84 #include <fmd_conf.h> 85 #include <fmd_subr.h> 86 #include <fmd_case.h> 87 #include <fmd_log.h> 88 89 #include <fmd.h> 90 91 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA) 92 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA) 93 94 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL) 95 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION) 96 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL) 97 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER) 98 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT) 99 #define CAT_FMA_UUID (EXT_STRING | EXC_DEFAULT | EXD_FMA_UUID) 100 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC) 101 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC) 102 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST) 103 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR) 104 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR) 105 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE) 106 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET) 107 108 static ssize_t 109 fmd_log_write(fmd_log_t *lp, const void *buf, size_t n) 110 { 111 ssize_t resid = n; 112 ssize_t len; 113 114 ASSERT(MUTEX_HELD(&lp->log_lock)); 115 116 while (resid != 0) { 117 if ((len = write(lp->log_fd, buf, resid)) <= 0) 118 break; 119 120 resid -= len; 121 buf = (char *)buf + len; 122 } 123 124 if (resid == n && n != 0) 125 return (-1); 126 127 return (n - resid); 128 } 129 130 static int 131 fmd_log_write_hdr(fmd_log_t *lp, const char *tag) 132 { 133 ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5, i6; 134 const char *osrel, *osver, *plat; 135 off64_t off = 0; 136 int err = 0; 137 uuid_t uuid; 138 139 (void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel); 140 (void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver); 141 (void) fmd_conf_getprop(fmd.d_conf, "platform", &plat); 142 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &lp->log_uuidlen); 143 144 lp->log_uuid = fmd_zalloc(lp->log_uuidlen + 1, FMD_SLEEP); 145 uuid_generate(uuid); 146 uuid_unparse(uuid, lp->log_uuid); 147 148 err |= ea_set_group(&hdr, CAT_FMA_GROUP); 149 err |= ea_set_group(&toc, CAT_FMA_GROUP); 150 151 err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0); 152 err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0); 153 err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0); 154 err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0); 155 err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0); 156 err |= ea_set_item(&i5, CAT_FMA_UUID, lp->log_uuid, 0); 157 err |= ea_set_item(&i6, CAT_FMA_OFFSET, &off, 0); 158 159 (void) ea_attach_to_group(&hdr, &i0); 160 (void) ea_attach_to_group(&hdr, &i1); 161 (void) ea_attach_to_group(&hdr, &i2); 162 (void) ea_attach_to_group(&hdr, &i3); 163 (void) ea_attach_to_group(&hdr, &i4); 164 (void) ea_attach_to_group(&hdr, &i5); 165 (void) ea_attach_to_group(&toc, &i6); 166 167 if (err == 0) { 168 size_t hdr_size = ea_pack_object(&hdr, NULL, 0); 169 size_t toc_size = ea_pack_object(&toc, NULL, 0); 170 171 size_t size = hdr_size + toc_size; 172 void *buf = fmd_alloc(size, FMD_SLEEP); 173 174 (void) ea_pack_object(&hdr, buf, hdr_size); 175 (void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size); 176 177 if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L) 178 fmd_panic("failed to seek log %s", lp->log_name); 179 180 if (fmd_log_write(lp, buf, size) != size) 181 err = errno; /* save errno for fmd_set_errno() below */ 182 183 fmd_free(buf, size); 184 185 lp->log_toc = lp->log_off + hdr_size; 186 lp->log_beg = lp->log_off + hdr_size + toc_size; 187 lp->log_off = lp->log_off + hdr_size + toc_size; 188 189 if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END)) 190 fmd_panic("eof off != log_off 0x%llx\n", lp->log_off); 191 } else 192 err = EFMD_LOG_EXACCT; 193 194 (void) ea_free_item(&i0, EUP_ALLOC); 195 (void) ea_free_item(&i1, EUP_ALLOC); 196 (void) ea_free_item(&i2, EUP_ALLOC); 197 (void) ea_free_item(&i3, EUP_ALLOC); 198 (void) ea_free_item(&i4, EUP_ALLOC); 199 (void) ea_free_item(&i5, EUP_ALLOC); 200 (void) ea_free_item(&i6, EUP_ALLOC); 201 202 return (err ? fmd_set_errno(err) : 0); 203 } 204 205 static int 206 fmd_log_check_err(fmd_log_t *lp, int err, const char *msg) 207 { 208 int eaerr = ea_error(); 209 char buf[BUFSIZ]; 210 211 (void) snprintf(buf, sizeof (buf), "%s: %s: %s\n", 212 lp->log_name, msg, eaerr != EXR_OK ? 213 fmd_ea_strerror(eaerr) : "catalog tag mismatch"); 214 215 fmd_error(err, buf); 216 return (fmd_set_errno(err)); 217 } 218 219 static int 220 fmd_log_check_hdr(fmd_log_t *lp, const char *tag) 221 { 222 int got_version = 0, got_label = 0; 223 ea_object_t *grp, *obj; 224 off64_t hdr_off, hdr_size; 225 int dvers, fvers; 226 const char *p; 227 228 ea_clear(&lp->log_ea); /* resync exacct file */ 229 230 if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L) 231 fmd_panic("failed to seek log %s", lp->log_name); 232 233 /* 234 * Read the first group of log meta-data: the write-once read-only 235 * file header. We read all records in this group, ignoring all but 236 * the VERSION and LABEL, which are required and must be verified. 237 */ 238 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 239 grp->eo_catalog != CAT_FMA_GROUP) { 240 ea_free_object(grp, EUP_ALLOC); 241 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 242 "invalid fma hdr record group")); 243 } 244 245 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 246 switch (obj->eo_catalog) { 247 case CAT_FMA_VERSION: 248 for (dvers = 0, p = fmd.d_version; 249 *p != '\0'; p++) { 250 if (isdigit(*p)) 251 dvers = dvers * 10 + (*p - '0'); 252 else 253 break; 254 } 255 256 for (fvers = 0, p = obj->eo_item.ei_string; 257 *p != '\0'; p++) { 258 if (isdigit(*p)) 259 fvers = fvers * 10 + (*p - '0'); 260 else 261 break; 262 } 263 264 if (fvers > dvers) { 265 fmd_error(EFMD_LOG_INVAL, "%s: log version " 266 "%s is not supported by this daemon\n", 267 lp->log_name, obj->eo_item.ei_string); 268 ea_free_object(grp, EUP_ALLOC); 269 return (fmd_set_errno(EFMD_LOG_VERSION)); 270 } 271 272 got_version++; 273 break; 274 275 case CAT_FMA_LABEL: 276 if (strcmp(obj->eo_item.ei_string, tag) != 0) { 277 fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' " 278 "does not matched expected tag '%s'\n", 279 lp->log_name, obj->eo_item.ei_string, tag); 280 ea_free_object(grp, EUP_ALLOC); 281 return (fmd_set_errno(EFMD_LOG_INVAL)); 282 } 283 got_label++; 284 break; 285 case CAT_FMA_UUID: 286 lp->log_uuid = fmd_strdup(obj->eo_item.ei_string, 287 FMD_SLEEP); 288 lp->log_uuidlen = strlen(lp->log_uuid); 289 break; 290 } 291 } 292 293 hdr_size = ea_pack_object(grp, NULL, 0); 294 ea_free_object(grp, EUP_ALLOC); 295 296 if (!got_version || !got_label) { 297 fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not " 298 "include mandatory version and/or label\n", lp->log_name); 299 return (fmd_set_errno(EFMD_LOG_INVAL)); 300 } 301 302 /* 303 * Read the second group of log meta-data: the table of contents. We 304 * expect this group to contain an OFFSET object indicating the current 305 * value of log_skip. We save this in our fmd_log_t and then return. 306 */ 307 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 308 grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 || 309 grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) { 310 ea_free_object(grp, EUP_ALLOC); 311 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 312 "invalid fma toc record group")); 313 } 314 315 lp->log_toc = hdr_off + hdr_size; 316 lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0); 317 lp->log_off = lseek64(lp->log_fd, 0, SEEK_END); 318 lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64; 319 320 if (lp->log_skip > lp->log_off) { 321 fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; " 322 "resetting to zero\n", lp->log_name, lp->log_skip); 323 lp->log_skip = 0; 324 } 325 326 ea_free_object(grp, EUP_ALLOC); 327 return (0); 328 } 329 330 static int 331 fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags) 332 { 333 int fd = dup(lp->log_fd); 334 const char *creator; 335 336 (void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator); 337 338 if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) { 339 fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n", 340 lp->log_name, fmd_ea_strerror(ea_error())); 341 (void) close(fd); 342 return (fmd_set_errno(EFMD_LOG_EXACCT)); 343 } 344 345 lp->log_flags |= FMD_LF_EAOPEN; 346 return (0); 347 } 348 349 static fmd_log_t * 350 fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags) 351 { 352 fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP); 353 354 char buf[PATH_MAX]; 355 char *slash = "/"; 356 size_t len; 357 int err; 358 359 (void) pthread_mutex_init(&lp->log_lock, NULL); 360 (void) pthread_cond_init(&lp->log_cv, NULL); 361 (void) pthread_mutex_lock(&lp->log_lock); 362 363 if (strcmp(root, "") == 0) 364 slash = ""; 365 len = strlen(root) + strlen(name) + strlen(slash) + 1; /* for "\0" */ 366 lp->log_name = fmd_alloc(len, FMD_SLEEP); 367 (void) snprintf(lp->log_name, len, "%s%s%s", root, slash, name); 368 lp->log_tag = fmd_strdup(tag, FMD_SLEEP); 369 (void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree); 370 371 if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0) 372 lp->log_flags |= FMD_LF_REPLAY; 373 374 if (strcmp(lp->log_tag, FMD_LOG_XPRT) == 0) 375 oflags &= ~O_SYNC; 376 377 top: 378 if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 || 379 fstat64(lp->log_fd, &lp->log_stat) == -1) { 380 fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name); 381 fmd_log_close(lp); 382 return (NULL); 383 } 384 385 /* 386 * If our open() created the log file, use libexacct to write a header 387 * and position the file just after the header (EO_TAIL). If the log 388 * file already existed, use libexacct to validate the header and again 389 * position the file just after the header (EO_HEAD). Note that we lie 390 * to libexacct about 'oflags' in order to achieve the desired result. 391 */ 392 if (lp->log_stat.st_size == 0) { 393 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL, 394 O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag); 395 } else { 396 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD, 397 O_RDONLY) || fmd_log_check_hdr(lp, tag); 398 } 399 400 /* 401 * If ea_fdopen() failed and the log was pre-existing, attempt to move 402 * it aside and start a new one. If we created the log but failed to 403 * initialize it, then we have no choice but to give up (e.g. EROFS). 404 */ 405 if (err) { 406 fmd_error(EFMD_LOG_OPEN, 407 "failed to initialize log %s", lp->log_name); 408 409 if (lp->log_flags & FMD_LF_EAOPEN) { 410 lp->log_flags &= ~FMD_LF_EAOPEN; 411 (void) ea_close(&lp->log_ea); 412 } 413 414 (void) close(lp->log_fd); 415 lp->log_fd = -1; 416 417 if (lp->log_stat.st_size != 0 && snprintf(buf, 418 sizeof (buf), "%s-", lp->log_name) < PATH_MAX && 419 rename(lp->log_name, buf) == 0) { 420 TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf)); 421 if (oflags & O_CREAT) 422 goto top; 423 } 424 425 fmd_log_close(lp); 426 return (NULL); 427 } 428 429 lp->log_refs++; 430 (void) pthread_mutex_unlock(&lp->log_lock); 431 432 return (lp); 433 } 434 435 fmd_log_t * 436 fmd_log_tryopen(const char *root, const char *name, const char *tag) 437 { 438 return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC)); 439 } 440 441 fmd_log_t * 442 fmd_log_open(const char *root, const char *name, const char *tag) 443 { 444 return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC)); 445 } 446 447 void 448 fmd_log_close(fmd_log_t *lp) 449 { 450 ASSERT(MUTEX_HELD(&lp->log_lock)); 451 ASSERT(lp->log_refs == 0); 452 453 if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) { 454 fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n", 455 lp->log_name, fmd_ea_strerror(ea_error())); 456 } 457 458 if (lp->log_fd >= 0 && close(lp->log_fd) != 0) { 459 fmd_error(EFMD_LOG_CLOSE, 460 "failed to close log %s", lp->log_name); 461 } 462 463 fmd_strfree(lp->log_name); 464 fmd_strfree(lp->log_tag); 465 if (lp->log_uuid != NULL) 466 fmd_free(lp->log_uuid, lp->log_uuidlen + 1); 467 468 fmd_free(lp, sizeof (fmd_log_t)); 469 } 470 471 void 472 fmd_log_hold_pending(fmd_log_t *lp) 473 { 474 (void) pthread_mutex_lock(&lp->log_lock); 475 476 lp->log_refs++; 477 ASSERT(lp->log_refs != 0); 478 479 if (lp->log_flags & FMD_LF_REPLAY) { 480 lp->log_pending++; 481 ASSERT(lp->log_pending != 0); 482 } 483 484 (void) pthread_mutex_unlock(&lp->log_lock); 485 } 486 487 void 488 fmd_log_hold(fmd_log_t *lp) 489 { 490 (void) pthread_mutex_lock(&lp->log_lock); 491 lp->log_refs++; 492 ASSERT(lp->log_refs != 0); 493 (void) pthread_mutex_unlock(&lp->log_lock); 494 } 495 496 void 497 fmd_log_rele(fmd_log_t *lp) 498 { 499 (void) pthread_mutex_lock(&lp->log_lock); 500 ASSERT(lp->log_refs != 0); 501 502 if (--lp->log_refs == 0) 503 fmd_log_close(lp); 504 else 505 (void) pthread_mutex_unlock(&lp->log_lock); 506 } 507 508 void 509 fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp) 510 { 511 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 512 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 513 int err = 0; 514 515 ea_object_t grp0, grp1, i0, i1, i2, *items; 516 ea_object_t **fe = NULL; 517 size_t nvsize, easize, itsize, frsize; 518 char *nvbuf, *eabuf; 519 statvfs64_t stv; 520 521 (void) pthread_mutex_lock(&ep->ev_lock); 522 523 ASSERT(ep->ev_flags & FMD_EVF_VOLATILE); 524 ASSERT(ep->ev_log == NULL); 525 526 (void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR); 527 nvbuf = fmd_alloc(nvsize, FMD_SLEEP); 528 (void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0); 529 530 if (lp->log_flags & FMD_LF_REPLAY) 531 err |= ea_set_group(&grp0, CAT_FMA_RGROUP); 532 else 533 err |= ea_set_group(&grp0, CAT_FMA_GROUP); 534 535 err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0); 536 err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0); 537 err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize); 538 539 if (err != 0) { 540 (void) pthread_mutex_unlock(&ep->ev_lock); 541 err = EFMD_LOG_EXACCT; 542 goto exerr; 543 } 544 545 (void) ea_attach_to_group(&grp0, &i0); 546 (void) ea_attach_to_group(&grp0, &i1); 547 (void) ea_attach_to_group(&grp0, &i2); 548 549 /* 550 * If this event has a case associated with it (i.e. it is a list), 551 * then allocate a block of ea_object_t's and fill in a group for 552 * each event saved in the case's item list. For each such group, 553 * we attach it to grp1, which in turn will be attached to grp0. 554 */ 555 if (cp != NULL) { 556 ea_object_t *egrp, *ip, **fp; 557 fmd_event_impl_t *eip; 558 fmd_case_item_t *cit; 559 560 (void) ea_set_group(&grp1, CAT_FMA_GROUP); 561 frsize = sizeof (ea_object_t *) * cip->ci_nitems; 562 itsize = sizeof (ea_object_t) * cip->ci_nitems * 5; 563 items = ip = fmd_alloc(itsize, FMD_SLEEP); 564 565 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 566 major_t maj; 567 minor_t min; 568 569 eip = (fmd_event_impl_t *)cit->cit_event; 570 571 if (eip->ev_log == NULL) 572 continue; /* event was never logged */ 573 574 maj = major(eip->ev_log->log_stat.st_dev); 575 min = minor(eip->ev_log->log_stat.st_dev); 576 577 (void) ea_set_group(ip, CAT_FMA_GROUP); 578 egrp = ip++; /* first obj is group */ 579 580 /* 581 * If the event log file is in legacy format, 582 * then write the xref to the file in the legacy 583 * maj/min/inode method else write it using the 584 * file uuid. 585 */ 586 if (eip->ev_log->log_uuid == NULL) { 587 (void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0); 588 (void) ea_attach_to_group(egrp, ip++); 589 (void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0); 590 (void) ea_attach_to_group(egrp, ip++); 591 (void) ea_set_item(ip, CAT_FMA_INODE, 592 &eip->ev_log->log_stat.st_ino, 0); 593 (void) ea_attach_to_group(egrp, ip++); 594 } else { 595 if (ea_set_item(ip, CAT_FMA_UUID, 596 eip->ev_log->log_uuid, 0) == -1) { 597 err = EFMD_LOG_EXACCT; 598 goto exerrcp; 599 } 600 if (fe == NULL) 601 fe = fp = fmd_zalloc(frsize, FMD_SLEEP); 602 *fp++ = ip; 603 (void) ea_attach_to_group(egrp, ip++); 604 } 605 (void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0); 606 (void) ea_attach_to_group(egrp, ip++); 607 (void) ea_attach_to_group(&grp1, egrp); 608 } 609 (void) ea_attach_to_group(&grp0, &grp1); 610 } 611 612 easize = ea_pack_object(&grp0, NULL, 0); 613 eabuf = fmd_alloc(easize, FMD_SLEEP); 614 (void) ea_pack_object(&grp0, eabuf, easize); 615 616 /* 617 * Before writing the record, check to see if this would cause the free 618 * space in the filesystem to drop below our minfree threshold. If so, 619 * don't bother attempting the write and instead pretend it failed. As 620 * fmd(8) runs as root, it will be able to access the space "reserved" 621 * for root, and therefore can run the system of out of disk space in a 622 * heavy error load situation, violating the basic design principle of 623 * fmd(8) that we don't want to make a bad situation even worse. 624 */ 625 (void) pthread_mutex_lock(&lp->log_lock); 626 627 if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 && 628 stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) { 629 630 TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag)); 631 err = EFMD_LOG_MINFREE; 632 633 } else if (fmd_log_write(lp, eabuf, easize) == easize) { 634 TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx", 635 lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off)); 636 637 ep->ev_flags &= ~FMD_EVF_VOLATILE; 638 ep->ev_log = lp; 639 ep->ev_off = lp->log_off; 640 ep->ev_len = easize; 641 642 if (lp->log_flags & FMD_LF_REPLAY) { 643 lp->log_pending++; 644 ASSERT(lp->log_pending != 0); 645 } 646 647 lp->log_refs++; 648 ASSERT(lp->log_refs != 0); 649 lp->log_off += easize; 650 } else { 651 err = errno; /* save errno for fmd_error() call below */ 652 653 /* 654 * If we can't write append the record, seek the file back to 655 * the original location and truncate it there in order to make 656 * sure the file is always in a sane state w.r.t. libexacct. 657 */ 658 (void) lseek64(lp->log_fd, lp->log_off, SEEK_SET); 659 (void) ftruncate64(lp->log_fd, lp->log_off); 660 } 661 662 (void) pthread_mutex_unlock(&lp->log_lock); 663 (void) pthread_mutex_unlock(&ep->ev_lock); 664 665 fmd_free(eabuf, easize); 666 667 exerrcp: 668 if (cp != NULL) { 669 if (fe != NULL) { 670 ea_object_t **fp = fe; 671 int i = 0; 672 673 for (; *fp != NULL && i < cip->ci_nitems; i++) 674 (void) ea_free_item(*fp++, EUP_ALLOC); 675 fmd_free(fe, frsize); 676 } 677 678 fmd_free(items, itsize); 679 } 680 681 exerr: 682 fmd_free(nvbuf, nvsize); 683 684 (void) ea_free_item(&i0, EUP_ALLOC); 685 (void) ea_free_item(&i1, EUP_ALLOC); 686 (void) ea_free_item(&i2, EUP_ALLOC); 687 688 /* 689 * Keep track of out-of-space errors using global statistics. As we're 690 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged. 691 */ 692 if (err == ENOSPC || err == EFMD_LOG_MINFREE) { 693 fmd_stat_t *sp; 694 695 if (lp == fmd.d_errlog) 696 sp = &fmd.d_stats->ds_err_enospc; 697 else if (lp == fmd.d_fltlog) 698 sp = &fmd.d_stats->ds_flt_enospc; 699 else 700 sp = &fmd.d_stats->ds_oth_enospc; 701 702 (void) pthread_mutex_lock(&fmd.d_stats_lock); 703 sp->fmds_value.ui64++; 704 (void) pthread_mutex_unlock(&fmd.d_stats_lock); 705 } 706 707 if (err != 0) { 708 fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n", 709 lp->log_tag, (void *)ep, fmd_strerror(err)); 710 } 711 } 712 713 /* 714 * Commit an event to the log permanently, indicating that it should not be 715 * replayed on restart. This is done by overwriting the event group's catalog 716 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We 717 * use pwrite64() to update the existing word directly, using somewhat guilty 718 * knowledge that exacct stores the 32-bit catalog word first for each object. 719 * Since we are overwriting an existing log location using pwrite64() and hold 720 * the event lock, we do not need to hold the log_lock during the i/o. 721 */ 722 void 723 fmd_log_commit(fmd_log_t *lp, fmd_event_t *e) 724 { 725 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 726 ea_catalog_t c; 727 int err = 0; 728 729 if (!(lp->log_flags & FMD_LF_REPLAY)) 730 return; /* log does not require replay tagging */ 731 732 ASSERT(MUTEX_HELD(&ep->ev_lock)); 733 ASSERT(ep->ev_log == lp && ep->ev_off != 0); 734 735 c = CAT_FMA_GROUP; 736 exacct_order32(&c); 737 738 if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) { 739 TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep)); 740 ep->ev_flags &= ~FMD_EVF_REPLAY; 741 742 /* 743 * If we have committed the event, check to see if the TOC skip 744 * offset needs to be updated, and decrement the pending count. 745 */ 746 (void) pthread_mutex_lock(&lp->log_lock); 747 748 if (lp->log_skip == ep->ev_off) { 749 lp->log_flags |= FMD_LF_DIRTY; 750 lp->log_skip += ep->ev_len; 751 } 752 753 ASSERT(lp->log_pending != 0); 754 lp->log_pending--; 755 756 (void) pthread_cond_broadcast(&lp->log_cv); 757 (void) pthread_mutex_unlock(&lp->log_lock); 758 759 } else { 760 fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n", 761 lp->log_tag, (void *)ep, fmd_strerror(err)); 762 } 763 } 764 765 /* 766 * If we need to destroy an event and it wasn't able to be committed, we permit 767 * the owner to decommit from ever trying again. This operation decrements the 768 * pending count on the log and broadcasts to anyone waiting on log_cv. 769 */ 770 void 771 fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e) 772 { 773 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 774 775 if (!(lp->log_flags & FMD_LF_REPLAY)) 776 return; /* log does not require replay tagging */ 777 778 ASSERT(MUTEX_HELD(&ep->ev_lock)); 779 ASSERT(ep->ev_log == lp); 780 781 (void) pthread_mutex_lock(&lp->log_lock); 782 783 TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep)); 784 ep->ev_flags &= ~FMD_EVF_REPLAY; 785 786 ASSERT(lp->log_pending != 0); 787 lp->log_pending--; 788 789 (void) pthread_cond_broadcast(&lp->log_cv); 790 (void) pthread_mutex_unlock(&lp->log_lock); 791 } 792 793 static fmd_event_t * 794 fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off) 795 { 796 fmd_timeval_t ftv = { -1ULL, -1ULL }; 797 nvlist_t *nvl = NULL; 798 799 ea_object_t *obj; 800 char *class; 801 int err; 802 803 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 804 switch (obj->eo_catalog) { 805 case CAT_FMA_NVLIST: 806 if ((err = nvlist_xunpack(obj->eo_item.ei_raw, 807 obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) { 808 fmd_error(EFMD_LOG_UNPACK, "failed to unpack " 809 "log nvpair: %s\n", fmd_strerror(err)); 810 return (NULL); 811 } 812 break; 813 814 case CAT_FMA_TODSEC: 815 ftv.ftv_sec = obj->eo_item.ei_uint64; 816 break; 817 818 case CAT_FMA_TODNSEC: 819 ftv.ftv_nsec = obj->eo_item.ei_uint64; 820 break; 821 } 822 } 823 824 if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) { 825 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 826 "required object(s) missing from record group\n"); 827 nvlist_free(nvl); 828 return (NULL); 829 } 830 831 if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) { 832 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 833 "record is missing required '%s' nvpair\n", FM_CLASS); 834 nvlist_free(nvl); 835 return (NULL); 836 } 837 838 return (fmd_event_recreate(FMD_EVT_PROTOCOL, 839 &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0))); 840 } 841 842 /* 843 * Replay event(s) from the specified log by invoking the specified callback 844 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set, 845 * we replay all events after log_skip that have the FMA_RGROUP group tag. 846 * This mode is used for the error telemetry log. If the log does not have 847 * this flag set (used for ASRU logs), only the most recent event is replayed. 848 */ 849 void 850 fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data) 851 { 852 ea_object_t obj, *grp; 853 ea_object_type_t type; 854 ea_catalog_t c; 855 fmd_event_t *ep; 856 off64_t off, skp; 857 uint_t n = 0; 858 859 (void) pthread_mutex_lock(&lp->log_lock); 860 861 if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) { 862 (void) pthread_mutex_unlock(&lp->log_lock); 863 return; /* we just created this log: never replay events */ 864 } 865 866 while (lp->log_flags & FMD_LF_BUSY) 867 (void) pthread_cond_wait(&lp->log_cv, &lp->log_lock); 868 869 if (lp->log_off == lp->log_beg) { 870 (void) pthread_mutex_unlock(&lp->log_lock); 871 return; /* no records appended yet */ 872 } 873 874 lp->log_flags |= FMD_LF_BUSY; 875 skp = lp->log_skip; 876 ea_clear(&lp->log_ea); /* resync exacct file */ 877 878 /* 879 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it 880 * is non-zero) or at log_beg. Otherwise replay from the end (log_off) 881 */ 882 if (lp->log_flags & FMD_LF_REPLAY) { 883 off = MAX(lp->log_beg, lp->log_skip); 884 c = CAT_FMA_RGROUP; 885 } else { 886 off = lp->log_off; 887 c = CAT_FMA_GROUP; 888 } 889 890 if (lseek64(lp->log_fd, off, SEEK_SET) != off) { 891 fmd_panic("failed to seek %s to 0x%llx\n", 892 lp->log_name, (u_longlong_t)off); 893 } 894 895 /* 896 * If FMD_LF_REPLAY is not set, back up to the start of the previous 897 * object and make sure this object is an EO_GROUP; otherwise return. 898 */ 899 if (!(lp->log_flags & FMD_LF_REPLAY) && 900 (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) { 901 fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected " 902 "type %d (log may be truncated or corrupt)\n", type); 903 goto out; 904 } 905 906 while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) { 907 if (!(lp->log_flags & FMD_LF_REPLAY)) 908 off -= ea_pack_object(grp, NULL, 0); 909 else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP) 910 skp = off; /* update skip */ 911 912 /* 913 * We temporarily drop log_lock around the call to unpack the 914 * event, hold it, and perform the callback, because these 915 * operations may try to acquire log_lock to bump log_refs. 916 * We cannot lose control because the FMD_LF_BUSY flag is set. 917 */ 918 (void) pthread_mutex_unlock(&lp->log_lock); 919 920 if (grp->eo_catalog == c && 921 (ep = fmd_log_unpack(lp, grp, off)) != NULL) { 922 923 TRACE((FMD_DBG_LOG, "replay %s %p off %llx", 924 lp->log_tag, (void *)ep, (u_longlong_t)off)); 925 926 fmd_event_hold(ep); 927 func(lp, ep, data); 928 fmd_event_rele(ep); 929 n++; 930 } 931 932 (void) pthread_mutex_lock(&lp->log_lock); 933 off += ea_pack_object(grp, NULL, 0); 934 ea_free_object(grp, EUP_ALLOC); 935 } 936 937 if (ea_error() != EXR_EOF) { 938 fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at " 939 "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off, 940 fmd_ea_strerror(ea_error())); 941 } 942 943 if (n == 0) 944 skp = off; /* if no replays, move skip to where we ended up */ 945 946 out: 947 if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) { 948 fmd_panic("failed to seek %s to 0x%llx\n", 949 lp->log_name, (u_longlong_t)lp->log_off); 950 } 951 952 if (skp != lp->log_skip) { 953 lp->log_flags |= FMD_LF_DIRTY; 954 lp->log_skip = skp; 955 } 956 957 lp->log_flags &= ~FMD_LF_BUSY; 958 (void) pthread_cond_broadcast(&lp->log_cv); 959 (void) pthread_mutex_unlock(&lp->log_lock); 960 } 961 962 void 963 fmd_log_update(fmd_log_t *lp) 964 { 965 ea_object_t toc, item; 966 off64_t skip = 0; 967 size_t size; 968 void *buf; 969 970 (void) pthread_mutex_lock(&lp->log_lock); 971 972 if (lp->log_flags & FMD_LF_DIRTY) { 973 lp->log_flags &= ~FMD_LF_DIRTY; 974 skip = lp->log_skip; 975 } 976 977 (void) pthread_mutex_unlock(&lp->log_lock); 978 979 /* 980 * If the skip needs to be updated, construct a TOC record group 981 * containing the skip offset and overwrite the TOC in-place. 982 */ 983 if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 && 984 ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) { 985 986 (void) ea_attach_to_group(&toc, &item); 987 size = ea_pack_object(&toc, NULL, 0); 988 buf = fmd_alloc(size, FMD_SLEEP); 989 990 (void) ea_pack_object(&toc, buf, size); 991 ASSERT(lp->log_toc + size == lp->log_beg); 992 993 if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) { 994 TRACE((FMD_DBG_LOG, "updated skip to %llx", skip)); 995 } else { 996 fmd_error(EFMD_LOG_UPDATE, 997 "failed to log_update %s", lp->log_tag); 998 } 999 1000 fmd_free(buf, size); 1001 (void) ea_free_item(&item, EUP_ALLOC); 1002 } 1003 } 1004 1005 /* 1006 * Rotate the specified log by renaming its underlying file to a staging file 1007 * that can be handed off to logadm(8) or an administrator script. If the 1008 * rename succeeds, open a new log file using the old path and return it. 1009 * Note that we are relying our caller to use some higher-level mechanism to 1010 * ensure that fmd_log_rotate() cannot be called while other threads are 1011 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used 1012 * for the global errlog and fltlog). 1013 */ 1014 fmd_log_t * 1015 fmd_log_rotate(fmd_log_t *lp) 1016 { 1017 char npath[PATH_MAX]; 1018 fmd_log_t *nlp; 1019 1020 (void) snprintf(npath, sizeof (npath), "%s+", lp->log_name); 1021 1022 /* 1023 * Open new log file. 1024 */ 1025 if ((nlp = fmd_log_open("", npath, lp->log_tag)) == NULL) { 1026 fmd_error(EFMD_LOG_ROTATE, "failed to open %s", npath); 1027 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1028 return (NULL); 1029 } 1030 1031 (void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name); 1032 (void) pthread_mutex_lock(&lp->log_lock); 1033 1034 /* 1035 * Check for any pending commits to drain before proceeding. We can't 1036 * rotate the log out if commits are pending because if we die after 1037 * the log is moved aside, we won't be able to replay them on restart. 1038 */ 1039 if (lp->log_pending != 0) { 1040 (void) pthread_mutex_unlock(&lp->log_lock); 1041 (void) unlink(nlp->log_name); 1042 fmd_log_rele(nlp); 1043 (void) fmd_set_errno(EFMD_LOG_ROTBUSY); 1044 return (NULL); 1045 } 1046 1047 if (rename(lp->log_name, npath) != 0) { 1048 (void) pthread_mutex_unlock(&lp->log_lock); 1049 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name); 1050 (void) unlink(nlp->log_name); 1051 fmd_log_rele(nlp); 1052 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1053 return (NULL); 1054 } 1055 1056 if (rename(nlp->log_name, lp->log_name) != 0) { 1057 (void) pthread_mutex_unlock(&lp->log_lock); 1058 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", 1059 nlp->log_name); 1060 (void) unlink(nlp->log_name); 1061 fmd_log_rele(nlp); 1062 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1063 return (NULL); 1064 } 1065 1066 /* 1067 * Change name of new log file 1068 */ 1069 fmd_strfree(nlp->log_name); 1070 nlp->log_name = fmd_strdup(lp->log_name, FMD_SLEEP); 1071 1072 /* 1073 * If we've rotated the log, no pending events exist so we don't have 1074 * any more commits coming, and our caller should have arranged for 1075 * no more calls to append. As such, we can close log_fd for good. 1076 */ 1077 if (lp->log_flags & FMD_LF_EAOPEN) { 1078 (void) ea_close(&lp->log_ea); 1079 lp->log_flags &= ~FMD_LF_EAOPEN; 1080 } 1081 1082 (void) close(lp->log_fd); 1083 lp->log_fd = -1; 1084 1085 (void) pthread_mutex_unlock(&lp->log_lock); 1086 return (nlp); 1087 } 1088