1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * FMD Log File Subsystem 31 * 32 * Events are written to one of two log files as they are received or created; 33 * the error log tracks all ereport.* events received on the inbound event 34 * transport, and the fault log tracks all list.* events generated by fmd or 35 * its client modules. In addition, we use the same log file format to cache 36 * state and events associated with ASRUs that are named in a diagnosis. 37 * 38 * The log files use the exacct format manipulated by libexacct(3LIB) and 39 * originally defined in PSARC 1999/119. However, the exacct library was 40 * designed primarily for read-only clients and without the synchronous i/o 41 * considerations and seeking required for fmd, so we use libexacct here only 42 * to read and write the file headers and to pack data from memory into a file 43 * bytestream. All of the i/o and file offset manipulations are performed by 44 * the fmd code below. Our exacct file management uses the following grammar: 45 * 46 * file := hdr toc event* 47 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER 48 * EXD_FMA_PLAT EXD_FMA_UUID 49 * toc := EXD_FMA_OFFSET 50 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* or legacy evref 51 * evref := EXD_FMA_UUID EXD_FMA_OFFSET 52 * legacy evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET 53 * 54 * Any event can be uniquely identified by the tuple (file, offset) where file 55 * is encoded as (uuid) when we are cross-linking files. For legacy file 56 * formats we still support encoding the reference as (major, minor, inode). 57 * Note that we break out of the file's dev_t into its two 32-bit components to 58 * permit development of either 32-bit or 64-bit log readers and writers; the 59 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way 60 * for a 32-bit application to retrieve and store a 64-bit dev_t. 61 * 62 * In order to replay events in the event of an fmd crash, events are initially 63 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the 64 * fmd_log_append() function. Later, once an event transitions from the 65 * received state to one of its other states (see fmd_event.c for details), 66 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating 67 * that the event is fully processed and no longer needs to be replayed. 68 */ 69 70 #include <sys/types.h> 71 #include <sys/mkdev.h> 72 #include <sys/statvfs.h> 73 #include <sys/fm/protocol.h> 74 #include <sys/exacct_impl.h> 75 #include <uuid/uuid.h> 76 77 #include <unistd.h> 78 #include <limits.h> 79 #include <fcntl.h> 80 #include <ctype.h> 81 82 #include <fmd_alloc.h> 83 #include <fmd_error.h> 84 #include <fmd_string.h> 85 #include <fmd_event.h> 86 #include <fmd_conf.h> 87 #include <fmd_subr.h> 88 #include <fmd_case.h> 89 #include <fmd_log.h> 90 91 #include <fmd.h> 92 93 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA) 94 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA) 95 96 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL) 97 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION) 98 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL) 99 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER) 100 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT) 101 #define CAT_FMA_UUID (EXT_STRING | EXC_DEFAULT | EXD_FMA_UUID) 102 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC) 103 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC) 104 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST) 105 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR) 106 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR) 107 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE) 108 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET) 109 110 static ssize_t 111 fmd_log_write(fmd_log_t *lp, const void *buf, size_t n) 112 { 113 ssize_t resid = n; 114 ssize_t len; 115 116 ASSERT(MUTEX_HELD(&lp->log_lock)); 117 118 while (resid != 0) { 119 if ((len = write(lp->log_fd, buf, resid)) <= 0) 120 break; 121 122 resid -= len; 123 buf = (char *)buf + len; 124 } 125 126 if (resid == n && n != 0) 127 return (-1); 128 129 return (n - resid); 130 } 131 132 static int 133 fmd_log_write_hdr(fmd_log_t *lp, const char *tag) 134 { 135 ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5, i6; 136 const char *osrel, *osver, *plat; 137 off64_t off = 0; 138 int err = 0; 139 uuid_t uuid; 140 141 (void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel); 142 (void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver); 143 (void) fmd_conf_getprop(fmd.d_conf, "platform", &plat); 144 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &lp->log_uuidlen); 145 146 lp->log_uuid = fmd_zalloc(lp->log_uuidlen + 1, FMD_SLEEP); 147 uuid_generate(uuid); 148 uuid_unparse(uuid, lp->log_uuid); 149 150 err |= ea_set_group(&hdr, CAT_FMA_GROUP); 151 err |= ea_set_group(&toc, CAT_FMA_GROUP); 152 153 err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0); 154 err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0); 155 err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0); 156 err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0); 157 err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0); 158 err |= ea_set_item(&i5, CAT_FMA_UUID, lp->log_uuid, 0); 159 err |= ea_set_item(&i6, CAT_FMA_OFFSET, &off, 0); 160 161 (void) ea_attach_to_group(&hdr, &i0); 162 (void) ea_attach_to_group(&hdr, &i1); 163 (void) ea_attach_to_group(&hdr, &i2); 164 (void) ea_attach_to_group(&hdr, &i3); 165 (void) ea_attach_to_group(&hdr, &i4); 166 (void) ea_attach_to_group(&hdr, &i5); 167 (void) ea_attach_to_group(&toc, &i6); 168 169 if (err == 0) { 170 size_t hdr_size = ea_pack_object(&hdr, NULL, 0); 171 size_t toc_size = ea_pack_object(&toc, NULL, 0); 172 173 size_t size = hdr_size + toc_size; 174 void *buf = fmd_alloc(size, FMD_SLEEP); 175 176 (void) ea_pack_object(&hdr, buf, hdr_size); 177 (void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size); 178 179 if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L) 180 fmd_panic("failed to seek log %s", lp->log_name); 181 182 if (fmd_log_write(lp, buf, size) != size) 183 err = errno; /* save errno for fmd_set_errno() below */ 184 185 fmd_free(buf, size); 186 187 lp->log_toc = lp->log_off + hdr_size; 188 lp->log_beg = lp->log_off + hdr_size + toc_size; 189 lp->log_off = lp->log_off + hdr_size + toc_size; 190 191 if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END)) 192 fmd_panic("eof off != log_off 0x%llx\n", lp->log_off); 193 } else 194 err = EFMD_LOG_EXACCT; 195 196 (void) ea_free_item(&i0, EUP_ALLOC); 197 (void) ea_free_item(&i1, EUP_ALLOC); 198 (void) ea_free_item(&i2, EUP_ALLOC); 199 (void) ea_free_item(&i3, EUP_ALLOC); 200 (void) ea_free_item(&i4, EUP_ALLOC); 201 (void) ea_free_item(&i5, EUP_ALLOC); 202 (void) ea_free_item(&i6, EUP_ALLOC); 203 204 return (err ? fmd_set_errno(err) : 0); 205 } 206 207 static int 208 fmd_log_check_err(fmd_log_t *lp, int err, const char *msg) 209 { 210 int eaerr = ea_error(); 211 char buf[BUFSIZ]; 212 213 (void) snprintf(buf, sizeof (buf), "%s: %s: %s\n", 214 lp->log_name, msg, eaerr != EXR_OK ? 215 fmd_ea_strerror(eaerr) : "catalog tag mismatch"); 216 217 fmd_error(err, buf); 218 return (fmd_set_errno(err)); 219 } 220 221 static int 222 fmd_log_check_hdr(fmd_log_t *lp, const char *tag) 223 { 224 int got_version = 0, got_label = 0; 225 ea_object_t *grp, *obj; 226 off64_t hdr_off, hdr_size; 227 int dvers, fvers; 228 const char *p; 229 230 ea_clear(&lp->log_ea); /* resync exacct file */ 231 232 if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L) 233 fmd_panic("failed to seek log %s", lp->log_name); 234 235 /* 236 * Read the first group of log meta-data: the write-once read-only 237 * file header. We read all records in this group, ignoring all but 238 * the VERSION and LABEL, which are required and must be verified. 239 */ 240 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 241 grp->eo_catalog != CAT_FMA_GROUP) { 242 ea_free_object(grp, EUP_ALLOC); 243 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 244 "invalid fma hdr record group")); 245 } 246 247 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 248 switch (obj->eo_catalog) { 249 case CAT_FMA_VERSION: 250 for (dvers = 0, p = fmd.d_version; 251 *p != '\0'; p++) { 252 if (isdigit(*p)) 253 dvers = dvers * 10 + (*p - '0'); 254 else 255 break; 256 } 257 258 for (fvers = 0, p = obj->eo_item.ei_string; 259 *p != '\0'; p++) { 260 if (isdigit(*p)) 261 fvers = fvers * 10 + (*p - '0'); 262 else 263 break; 264 } 265 266 if (fvers > dvers) { 267 fmd_error(EFMD_LOG_INVAL, "%s: log version " 268 "%s is not supported by this daemon\n", 269 lp->log_name, obj->eo_item.ei_string); 270 ea_free_object(grp, EUP_ALLOC); 271 return (fmd_set_errno(EFMD_LOG_VERSION)); 272 } 273 274 got_version++; 275 break; 276 277 case CAT_FMA_LABEL: 278 if (strcmp(obj->eo_item.ei_string, tag) != 0) { 279 fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' " 280 "does not matched expected tag '%s'\n", 281 lp->log_name, obj->eo_item.ei_string, tag); 282 ea_free_object(grp, EUP_ALLOC); 283 return (fmd_set_errno(EFMD_LOG_INVAL)); 284 } 285 got_label++; 286 break; 287 case CAT_FMA_UUID: 288 lp->log_uuid = fmd_strdup(obj->eo_item.ei_string, 289 FMD_SLEEP); 290 lp->log_uuidlen = strlen(lp->log_uuid); 291 break; 292 } 293 } 294 295 hdr_size = ea_pack_object(grp, NULL, 0); 296 ea_free_object(grp, EUP_ALLOC); 297 298 if (!got_version || !got_label) { 299 fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not " 300 "include mandatory version and/or label\n", lp->log_name); 301 return (fmd_set_errno(EFMD_LOG_INVAL)); 302 } 303 304 /* 305 * Read the second group of log meta-data: the table of contents. We 306 * expect this group to contain an OFFSET object indicating the current 307 * value of log_skip. We save this in our fmd_log_t and then return. 308 */ 309 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 310 grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 || 311 grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) { 312 ea_free_object(grp, EUP_ALLOC); 313 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 314 "invalid fma toc record group")); 315 } 316 317 lp->log_toc = hdr_off + hdr_size; 318 lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0); 319 lp->log_off = lseek64(lp->log_fd, 0, SEEK_END); 320 lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64; 321 322 if (lp->log_skip > lp->log_off) { 323 fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; " 324 "resetting to zero\n", lp->log_name, lp->log_skip); 325 lp->log_skip = 0; 326 } 327 328 ea_free_object(grp, EUP_ALLOC); 329 return (0); 330 } 331 332 static int 333 fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags) 334 { 335 int fd = dup(lp->log_fd); 336 const char *creator; 337 338 (void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator); 339 340 if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) { 341 fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n", 342 lp->log_name, fmd_ea_strerror(ea_error())); 343 (void) close(fd); 344 return (fmd_set_errno(EFMD_LOG_EXACCT)); 345 } 346 347 lp->log_flags |= FMD_LF_EAOPEN; 348 return (0); 349 } 350 351 static fmd_log_t * 352 fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags) 353 { 354 fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP); 355 356 char buf[PATH_MAX]; 357 char *slash = "/"; 358 size_t len; 359 int err; 360 361 (void) pthread_mutex_init(&lp->log_lock, NULL); 362 (void) pthread_cond_init(&lp->log_cv, NULL); 363 (void) pthread_mutex_lock(&lp->log_lock); 364 365 if (strcmp(root, "") == 0) 366 slash = ""; 367 len = strlen(root) + strlen(name) + strlen(slash) + 1; /* for "\0" */ 368 lp->log_name = fmd_alloc(len, FMD_SLEEP); 369 (void) snprintf(lp->log_name, len, "%s%s%s", root, slash, name); 370 lp->log_tag = fmd_strdup(tag, FMD_SLEEP); 371 (void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree); 372 373 if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0) 374 lp->log_flags |= FMD_LF_REPLAY; 375 376 if (strcmp(lp->log_tag, FMD_LOG_XPRT) == 0) 377 oflags &= ~O_SYNC; 378 379 top: 380 if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 || 381 fstat64(lp->log_fd, &lp->log_stat) == -1) { 382 fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name); 383 fmd_log_close(lp); 384 return (NULL); 385 } 386 387 /* 388 * If our open() created the log file, use libexacct to write a header 389 * and position the file just after the header (EO_TAIL). If the log 390 * file already existed, use libexacct to validate the header and again 391 * position the file just after the header (EO_HEAD). Note that we lie 392 * to libexacct about 'oflags' in order to achieve the desired result. 393 */ 394 if (lp->log_stat.st_size == 0) { 395 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL, 396 O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag); 397 } else { 398 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD, 399 O_RDONLY) || fmd_log_check_hdr(lp, tag); 400 } 401 402 /* 403 * If ea_fdopen() failed and the log was pre-existing, attempt to move 404 * it aside and start a new one. If we created the log but failed to 405 * initialize it, then we have no choice but to give up (e.g. EROFS). 406 */ 407 if (err) { 408 fmd_error(EFMD_LOG_OPEN, 409 "failed to initialize log %s", lp->log_name); 410 411 if (lp->log_flags & FMD_LF_EAOPEN) { 412 lp->log_flags &= ~FMD_LF_EAOPEN; 413 (void) ea_close(&lp->log_ea); 414 } 415 416 (void) close(lp->log_fd); 417 lp->log_fd = -1; 418 419 if (lp->log_stat.st_size != 0 && snprintf(buf, 420 sizeof (buf), "%s-", lp->log_name) < PATH_MAX && 421 rename(lp->log_name, buf) == 0) { 422 TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf)); 423 if (oflags & O_CREAT) 424 goto top; 425 } 426 427 fmd_log_close(lp); 428 return (NULL); 429 } 430 431 lp->log_refs++; 432 (void) pthread_mutex_unlock(&lp->log_lock); 433 434 return (lp); 435 } 436 437 fmd_log_t * 438 fmd_log_tryopen(const char *root, const char *name, const char *tag) 439 { 440 return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC)); 441 } 442 443 fmd_log_t * 444 fmd_log_open(const char *root, const char *name, const char *tag) 445 { 446 return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC)); 447 } 448 449 void 450 fmd_log_close(fmd_log_t *lp) 451 { 452 ASSERT(MUTEX_HELD(&lp->log_lock)); 453 ASSERT(lp->log_refs == 0); 454 455 if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) { 456 fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n", 457 lp->log_name, fmd_ea_strerror(ea_error())); 458 } 459 460 if (lp->log_fd >= 0 && close(lp->log_fd) != 0) { 461 fmd_error(EFMD_LOG_CLOSE, 462 "failed to close log %s", lp->log_name); 463 } 464 465 fmd_strfree(lp->log_name); 466 fmd_strfree(lp->log_tag); 467 if (lp->log_uuid != NULL) 468 fmd_free(lp->log_uuid, lp->log_uuidlen + 1); 469 470 fmd_free(lp, sizeof (fmd_log_t)); 471 } 472 473 void 474 fmd_log_hold_pending(fmd_log_t *lp) 475 { 476 (void) pthread_mutex_lock(&lp->log_lock); 477 478 lp->log_refs++; 479 ASSERT(lp->log_refs != 0); 480 481 if (lp->log_flags & FMD_LF_REPLAY) { 482 lp->log_pending++; 483 ASSERT(lp->log_pending != 0); 484 } 485 486 (void) pthread_mutex_unlock(&lp->log_lock); 487 } 488 489 void 490 fmd_log_hold(fmd_log_t *lp) 491 { 492 (void) pthread_mutex_lock(&lp->log_lock); 493 lp->log_refs++; 494 ASSERT(lp->log_refs != 0); 495 (void) pthread_mutex_unlock(&lp->log_lock); 496 } 497 498 void 499 fmd_log_rele(fmd_log_t *lp) 500 { 501 (void) pthread_mutex_lock(&lp->log_lock); 502 ASSERT(lp->log_refs != 0); 503 504 if (--lp->log_refs == 0) 505 fmd_log_close(lp); 506 else 507 (void) pthread_mutex_unlock(&lp->log_lock); 508 } 509 510 void 511 fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp) 512 { 513 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 514 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 515 int err = 0; 516 517 ea_object_t grp0, grp1, i0, i1, i2, *items; 518 ea_object_t **fe = NULL; 519 size_t nvsize, easize, itsize, frsize; 520 char *nvbuf, *eabuf; 521 statvfs64_t stv; 522 523 (void) pthread_mutex_lock(&ep->ev_lock); 524 525 ASSERT(ep->ev_flags & FMD_EVF_VOLATILE); 526 ASSERT(ep->ev_log == NULL); 527 528 (void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR); 529 nvbuf = fmd_alloc(nvsize, FMD_SLEEP); 530 (void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0); 531 532 if (lp->log_flags & FMD_LF_REPLAY) 533 err |= ea_set_group(&grp0, CAT_FMA_RGROUP); 534 else 535 err |= ea_set_group(&grp0, CAT_FMA_GROUP); 536 537 err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0); 538 err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0); 539 err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize); 540 541 if (err != 0) { 542 (void) pthread_mutex_unlock(&ep->ev_lock); 543 err = EFMD_LOG_EXACCT; 544 goto exerr; 545 } 546 547 (void) ea_attach_to_group(&grp0, &i0); 548 (void) ea_attach_to_group(&grp0, &i1); 549 (void) ea_attach_to_group(&grp0, &i2); 550 551 /* 552 * If this event has a case associated with it (i.e. it is a list), 553 * then allocate a block of ea_object_t's and fill in a group for 554 * each event saved in the case's item list. For each such group, 555 * we attach it to grp1, which in turn will be attached to grp0. 556 */ 557 if (cp != NULL) { 558 ea_object_t *egrp, *ip, **fp; 559 fmd_event_impl_t *eip; 560 fmd_case_item_t *cit; 561 562 (void) ea_set_group(&grp1, CAT_FMA_GROUP); 563 frsize = sizeof (ea_object_t *) * cip->ci_nitems; 564 itsize = sizeof (ea_object_t) * cip->ci_nitems * 5; 565 items = ip = fmd_alloc(itsize, FMD_SLEEP); 566 567 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 568 major_t maj; 569 minor_t min; 570 571 eip = (fmd_event_impl_t *)cit->cit_event; 572 573 if (eip->ev_log == NULL) 574 continue; /* event was never logged */ 575 576 maj = major(eip->ev_log->log_stat.st_dev); 577 min = minor(eip->ev_log->log_stat.st_dev); 578 579 (void) ea_set_group(ip, CAT_FMA_GROUP); 580 egrp = ip++; /* first obj is group */ 581 582 /* 583 * If the event log file is in legacy format, 584 * then write the xref to the file in the legacy 585 * maj/min/inode method else write it using the 586 * file uuid. 587 */ 588 if (eip->ev_log->log_uuid == NULL) { 589 (void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0); 590 (void) ea_attach_to_group(egrp, ip++); 591 (void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0); 592 (void) ea_attach_to_group(egrp, ip++); 593 (void) ea_set_item(ip, CAT_FMA_INODE, 594 &eip->ev_log->log_stat.st_ino, 0); 595 (void) ea_attach_to_group(egrp, ip++); 596 } else { 597 if (ea_set_item(ip, CAT_FMA_UUID, 598 eip->ev_log->log_uuid, 0) == -1) { 599 err = EFMD_LOG_EXACCT; 600 goto exerrcp; 601 } 602 if (fe == NULL) 603 fe = fp = fmd_zalloc(frsize, FMD_SLEEP); 604 *fp++ = ip; 605 (void) ea_attach_to_group(egrp, ip++); 606 } 607 (void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0); 608 (void) ea_attach_to_group(egrp, ip++); 609 (void) ea_attach_to_group(&grp1, egrp); 610 } 611 (void) ea_attach_to_group(&grp0, &grp1); 612 } 613 614 easize = ea_pack_object(&grp0, NULL, 0); 615 eabuf = fmd_alloc(easize, FMD_SLEEP); 616 (void) ea_pack_object(&grp0, eabuf, easize); 617 618 /* 619 * Before writing the record, check to see if this would cause the free 620 * space in the filesystem to drop below our minfree threshold. If so, 621 * don't bother attempting the write and instead pretend it failed. As 622 * fmd(1M) runs as root, it will be able to access the space "reserved" 623 * for root, and therefore can run the system of out of disk space in a 624 * heavy error load situation, violating the basic design principle of 625 * fmd(1M) that we don't want to make a bad situation even worse. 626 */ 627 (void) pthread_mutex_lock(&lp->log_lock); 628 629 if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 && 630 stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) { 631 632 TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag)); 633 err = EFMD_LOG_MINFREE; 634 635 } else if (fmd_log_write(lp, eabuf, easize) == easize) { 636 TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx", 637 lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off)); 638 639 ep->ev_flags &= ~FMD_EVF_VOLATILE; 640 ep->ev_log = lp; 641 ep->ev_off = lp->log_off; 642 ep->ev_len = easize; 643 644 if (lp->log_flags & FMD_LF_REPLAY) { 645 lp->log_pending++; 646 ASSERT(lp->log_pending != 0); 647 } 648 649 lp->log_refs++; 650 ASSERT(lp->log_refs != 0); 651 lp->log_off += easize; 652 } else { 653 err = errno; /* save errno for fmd_error() call below */ 654 655 /* 656 * If we can't write append the record, seek the file back to 657 * the original location and truncate it there in order to make 658 * sure the file is always in a sane state w.r.t. libexacct. 659 */ 660 (void) lseek64(lp->log_fd, lp->log_off, SEEK_SET); 661 (void) ftruncate64(lp->log_fd, lp->log_off); 662 } 663 664 (void) pthread_mutex_unlock(&lp->log_lock); 665 (void) pthread_mutex_unlock(&ep->ev_lock); 666 667 fmd_free(eabuf, easize); 668 669 exerrcp: 670 if (cp != NULL) { 671 if (fe != NULL) { 672 ea_object_t **fp = fe; 673 int i = 0; 674 675 for (; *fp != NULL && i < cip->ci_nitems; i++) 676 (void) ea_free_item(*fp++, EUP_ALLOC); 677 fmd_free(fe, frsize); 678 } 679 680 fmd_free(items, itsize); 681 } 682 683 exerr: 684 fmd_free(nvbuf, nvsize); 685 686 (void) ea_free_item(&i0, EUP_ALLOC); 687 (void) ea_free_item(&i1, EUP_ALLOC); 688 (void) ea_free_item(&i2, EUP_ALLOC); 689 690 /* 691 * Keep track of out-of-space errors using global statistics. As we're 692 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged. 693 */ 694 if (err == ENOSPC || err == EFMD_LOG_MINFREE) { 695 fmd_stat_t *sp; 696 697 if (lp == fmd.d_errlog) 698 sp = &fmd.d_stats->ds_err_enospc; 699 else if (lp == fmd.d_fltlog) 700 sp = &fmd.d_stats->ds_flt_enospc; 701 else 702 sp = &fmd.d_stats->ds_oth_enospc; 703 704 (void) pthread_mutex_lock(&fmd.d_stats_lock); 705 sp->fmds_value.ui64++; 706 (void) pthread_mutex_unlock(&fmd.d_stats_lock); 707 } 708 709 if (err != 0) { 710 fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n", 711 lp->log_tag, (void *)ep, fmd_strerror(err)); 712 } 713 } 714 715 /* 716 * Commit an event to the log permanently, indicating that it should not be 717 * replayed on restart. This is done by overwriting the event group's catalog 718 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We 719 * use pwrite64() to update the existing word directly, using somewhat guilty 720 * knowledge that exacct stores the 32-bit catalog word first for each object. 721 * Since we are overwriting an existing log location using pwrite64() and hold 722 * the event lock, we do not need to hold the log_lock during the i/o. 723 */ 724 void 725 fmd_log_commit(fmd_log_t *lp, fmd_event_t *e) 726 { 727 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 728 ea_catalog_t c; 729 int err = 0; 730 731 if (!(lp->log_flags & FMD_LF_REPLAY)) 732 return; /* log does not require replay tagging */ 733 734 ASSERT(MUTEX_HELD(&ep->ev_lock)); 735 ASSERT(ep->ev_log == lp && ep->ev_off != 0); 736 737 c = CAT_FMA_GROUP; 738 exacct_order32(&c); 739 740 if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) { 741 TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep)); 742 ep->ev_flags &= ~FMD_EVF_REPLAY; 743 744 /* 745 * If we have committed the event, check to see if the TOC skip 746 * offset needs to be updated, and decrement the pending count. 747 */ 748 (void) pthread_mutex_lock(&lp->log_lock); 749 750 if (lp->log_skip == ep->ev_off) { 751 lp->log_flags |= FMD_LF_DIRTY; 752 lp->log_skip += ep->ev_len; 753 } 754 755 ASSERT(lp->log_pending != 0); 756 lp->log_pending--; 757 758 (void) pthread_cond_broadcast(&lp->log_cv); 759 (void) pthread_mutex_unlock(&lp->log_lock); 760 761 } else { 762 fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n", 763 lp->log_tag, (void *)ep, fmd_strerror(err)); 764 } 765 } 766 767 /* 768 * If we need to destroy an event and it wasn't able to be committed, we permit 769 * the owner to decommit from ever trying again. This operation decrements the 770 * pending count on the log and broadcasts to anyone waiting on log_cv. 771 */ 772 void 773 fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e) 774 { 775 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 776 777 if (!(lp->log_flags & FMD_LF_REPLAY)) 778 return; /* log does not require replay tagging */ 779 780 ASSERT(MUTEX_HELD(&ep->ev_lock)); 781 ASSERT(ep->ev_log == lp); 782 783 (void) pthread_mutex_lock(&lp->log_lock); 784 785 TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep)); 786 ep->ev_flags &= ~FMD_EVF_REPLAY; 787 788 ASSERT(lp->log_pending != 0); 789 lp->log_pending--; 790 791 (void) pthread_cond_broadcast(&lp->log_cv); 792 (void) pthread_mutex_unlock(&lp->log_lock); 793 } 794 795 static fmd_event_t * 796 fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off) 797 { 798 fmd_timeval_t ftv = { -1ULL, -1ULL }; 799 nvlist_t *nvl = NULL; 800 801 ea_object_t *obj; 802 char *class; 803 int err; 804 805 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 806 switch (obj->eo_catalog) { 807 case CAT_FMA_NVLIST: 808 if ((err = nvlist_xunpack(obj->eo_item.ei_raw, 809 obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) { 810 fmd_error(EFMD_LOG_UNPACK, "failed to unpack " 811 "log nvpair: %s\n", fmd_strerror(err)); 812 return (NULL); 813 } 814 break; 815 816 case CAT_FMA_TODSEC: 817 ftv.ftv_sec = obj->eo_item.ei_uint64; 818 break; 819 820 case CAT_FMA_TODNSEC: 821 ftv.ftv_nsec = obj->eo_item.ei_uint64; 822 break; 823 } 824 } 825 826 if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) { 827 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 828 "required object(s) missing from record group\n"); 829 nvlist_free(nvl); 830 return (NULL); 831 } 832 833 if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) { 834 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 835 "record is missing required '%s' nvpair\n", FM_CLASS); 836 nvlist_free(nvl); 837 return (NULL); 838 } 839 840 return (fmd_event_recreate(FMD_EVT_PROTOCOL, 841 &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0))); 842 } 843 844 /* 845 * Replay event(s) from the specified log by invoking the specified callback 846 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set, 847 * we replay all events after log_skip that have the FMA_RGROUP group tag. 848 * This mode is used for the error telemetry log. If the log does not have 849 * this flag set (used for ASRU logs), only the most recent event is replayed. 850 */ 851 void 852 fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data) 853 { 854 ea_object_t obj, *grp; 855 ea_object_type_t type; 856 ea_catalog_t c; 857 fmd_event_t *ep; 858 off64_t off, skp; 859 uint_t n = 0; 860 861 (void) pthread_mutex_lock(&lp->log_lock); 862 863 if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) { 864 (void) pthread_mutex_unlock(&lp->log_lock); 865 return; /* we just created this log: never replay events */ 866 } 867 868 while (lp->log_flags & FMD_LF_BUSY) 869 (void) pthread_cond_wait(&lp->log_cv, &lp->log_lock); 870 871 if (lp->log_off == lp->log_beg) { 872 (void) pthread_mutex_unlock(&lp->log_lock); 873 return; /* no records appended yet */ 874 } 875 876 lp->log_flags |= FMD_LF_BUSY; 877 skp = lp->log_skip; 878 ea_clear(&lp->log_ea); /* resync exacct file */ 879 880 /* 881 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it 882 * is non-zero) or at log_beg. Otherwise replay from the end (log_off) 883 */ 884 if (lp->log_flags & FMD_LF_REPLAY) { 885 off = MAX(lp->log_beg, lp->log_skip); 886 c = CAT_FMA_RGROUP; 887 } else { 888 off = lp->log_off; 889 c = CAT_FMA_GROUP; 890 } 891 892 if (lseek64(lp->log_fd, off, SEEK_SET) != off) { 893 fmd_panic("failed to seek %s to 0x%llx\n", 894 lp->log_name, (u_longlong_t)off); 895 } 896 897 /* 898 * If FMD_LF_REPLAY is not set, back up to the start of the previous 899 * object and make sure this object is an EO_GROUP; otherwise return. 900 */ 901 if (!(lp->log_flags & FMD_LF_REPLAY) && 902 (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) { 903 fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected " 904 "type %d (log may be truncated or corrupt)\n", type); 905 goto out; 906 } 907 908 while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) { 909 if (!(lp->log_flags & FMD_LF_REPLAY)) 910 off -= ea_pack_object(grp, NULL, 0); 911 else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP) 912 skp = off; /* update skip */ 913 914 /* 915 * We temporarily drop log_lock around the call to unpack the 916 * event, hold it, and perform the callback, because these 917 * operations may try to acquire log_lock to bump log_refs. 918 * We cannot lose control because the FMD_LF_BUSY flag is set. 919 */ 920 (void) pthread_mutex_unlock(&lp->log_lock); 921 922 if (grp->eo_catalog == c && 923 (ep = fmd_log_unpack(lp, grp, off)) != NULL) { 924 925 TRACE((FMD_DBG_LOG, "replay %s %p off %llx", 926 lp->log_tag, (void *)ep, (u_longlong_t)off)); 927 928 fmd_event_hold(ep); 929 func(lp, ep, data); 930 fmd_event_rele(ep); 931 n++; 932 } 933 934 (void) pthread_mutex_lock(&lp->log_lock); 935 off += ea_pack_object(grp, NULL, 0); 936 ea_free_object(grp, EUP_ALLOC); 937 } 938 939 if (ea_error() != EXR_EOF) { 940 fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at " 941 "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off, 942 fmd_ea_strerror(ea_error())); 943 } 944 945 if (n == 0) 946 skp = off; /* if no replays, move skip to where we ended up */ 947 948 out: 949 if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) { 950 fmd_panic("failed to seek %s to 0x%llx\n", 951 lp->log_name, (u_longlong_t)lp->log_off); 952 } 953 954 if (skp != lp->log_skip) { 955 lp->log_flags |= FMD_LF_DIRTY; 956 lp->log_skip = skp; 957 } 958 959 lp->log_flags &= ~FMD_LF_BUSY; 960 (void) pthread_cond_broadcast(&lp->log_cv); 961 (void) pthread_mutex_unlock(&lp->log_lock); 962 } 963 964 void 965 fmd_log_update(fmd_log_t *lp) 966 { 967 ea_object_t toc, item; 968 off64_t skip = 0; 969 size_t size; 970 void *buf; 971 972 (void) pthread_mutex_lock(&lp->log_lock); 973 974 if (lp->log_flags & FMD_LF_DIRTY) { 975 lp->log_flags &= ~FMD_LF_DIRTY; 976 skip = lp->log_skip; 977 } 978 979 (void) pthread_mutex_unlock(&lp->log_lock); 980 981 /* 982 * If the skip needs to be updated, construct a TOC record group 983 * containing the skip offset and overwrite the TOC in-place. 984 */ 985 if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 && 986 ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) { 987 988 (void) ea_attach_to_group(&toc, &item); 989 size = ea_pack_object(&toc, NULL, 0); 990 buf = fmd_alloc(size, FMD_SLEEP); 991 992 (void) ea_pack_object(&toc, buf, size); 993 ASSERT(lp->log_toc + size == lp->log_beg); 994 995 if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) { 996 TRACE((FMD_DBG_LOG, "updated skip to %llx", skip)); 997 } else { 998 fmd_error(EFMD_LOG_UPDATE, 999 "failed to log_update %s", lp->log_tag); 1000 } 1001 1002 fmd_free(buf, size); 1003 (void) ea_free_item(&item, EUP_ALLOC); 1004 } 1005 } 1006 1007 /* 1008 * Rotate the specified log by renaming its underlying file to a staging file 1009 * that can be handed off to logadm(1M) or an administrator script. If the 1010 * rename succeeds, open a new log file using the old path and return it. 1011 * Note that we are relying our caller to use some higher-level mechanism to 1012 * ensure that fmd_log_rotate() cannot be called while other threads are 1013 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used 1014 * for the global errlog and fltlog). 1015 */ 1016 fmd_log_t * 1017 fmd_log_rotate(fmd_log_t *lp) 1018 { 1019 char npath[PATH_MAX]; 1020 fmd_log_t *nlp; 1021 1022 (void) snprintf(npath, sizeof (npath), "%s+", lp->log_name); 1023 1024 /* 1025 * Open new log file. 1026 */ 1027 if ((nlp = fmd_log_open("", npath, lp->log_tag)) == NULL) { 1028 fmd_error(EFMD_LOG_ROTATE, "failed to open %s", npath); 1029 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1030 return (NULL); 1031 } 1032 1033 (void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name); 1034 (void) pthread_mutex_lock(&lp->log_lock); 1035 1036 /* 1037 * Check for any pending commits to drain before proceeding. We can't 1038 * rotate the log out if commits are pending because if we die after 1039 * the log is moved aside, we won't be able to replay them on restart. 1040 */ 1041 if (lp->log_pending != 0) { 1042 (void) pthread_mutex_unlock(&lp->log_lock); 1043 (void) unlink(nlp->log_name); 1044 fmd_log_rele(nlp); 1045 (void) fmd_set_errno(EFMD_LOG_ROTBUSY); 1046 return (NULL); 1047 } 1048 1049 if (rename(lp->log_name, npath) != 0) { 1050 (void) pthread_mutex_unlock(&lp->log_lock); 1051 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name); 1052 (void) unlink(nlp->log_name); 1053 fmd_log_rele(nlp); 1054 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1055 return (NULL); 1056 } 1057 1058 if (rename(nlp->log_name, lp->log_name) != 0) { 1059 (void) pthread_mutex_unlock(&lp->log_lock); 1060 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", 1061 nlp->log_name); 1062 (void) unlink(nlp->log_name); 1063 fmd_log_rele(nlp); 1064 (void) fmd_set_errno(EFMD_LOG_ROTATE); 1065 return (NULL); 1066 } 1067 1068 /* 1069 * Change name of new log file 1070 */ 1071 fmd_strfree(nlp->log_name); 1072 nlp->log_name = fmd_strdup(lp->log_name, FMD_SLEEP); 1073 1074 /* 1075 * If we've rotated the log, no pending events exist so we don't have 1076 * any more commits coming, and our caller should have arranged for 1077 * no more calls to append. As such, we can close log_fd for good. 1078 */ 1079 if (lp->log_flags & FMD_LF_EAOPEN) { 1080 (void) ea_close(&lp->log_ea); 1081 lp->log_flags &= ~FMD_LF_EAOPEN; 1082 } 1083 1084 (void) close(lp->log_fd); 1085 lp->log_fd = -1; 1086 1087 (void) pthread_mutex_unlock(&lp->log_lock); 1088 return (nlp); 1089 } 1090