1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * FMD Log File Subsystem 31 * 32 * Events are written to one of two log files as they are received or created; 33 * the error log tracks all ereport.* events received on the inbound event 34 * transport, and the fault log tracks all list.* events generated by fmd or 35 * its client modules. In addition, we use the same log file format to cache 36 * state and events associated with ASRUs that are named in a diagnosis. 37 * 38 * The log files use the exacct format manipulated by libexacct(3LIB) and 39 * originally defined in PSARC 1999/119. However, the exacct library was 40 * designed primarily for read-only clients and without the synchronous i/o 41 * considerations and seeking required for fmd, so we use libexacct here only 42 * to read and write the file headers and to pack data from memory into a file 43 * bytestream. All of the i/o and file offset manipulations are performed by 44 * the fmd code below. Our exacct file management uses the following grammar: 45 * 46 * file := hdr toc event* 47 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER EXD_FMA_PLAT 48 * toc := EXD_FMA_OFFSET 49 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* 50 * evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET 51 * 52 * Any event can be uniquely identified by the tuple (file, offset) where file 53 * is encoded as (major, minor, inode) when we are cross-linking files. Note 54 * that we break out of the file's dev_t into its two 32-bit components to 55 * permit development of either 32-bit or 64-bit log readers and writers; the 56 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way 57 * for a 32-bit application to retrieve and store a 64-bit dev_t. 58 * 59 * In order to replay events in the event of an fmd crash, events are initially 60 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the 61 * fmd_log_append() function. Later, once an event transitions from the 62 * received state to one of its other states (see fmd_event.c for details), 63 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating 64 * that the event is fully processed and no longer needs to be replayed. 65 */ 66 67 #include <sys/types.h> 68 #include <sys/mkdev.h> 69 #include <sys/statvfs.h> 70 #include <sys/fm/protocol.h> 71 #include <sys/exacct_impl.h> 72 73 #include <unistd.h> 74 #include <limits.h> 75 #include <fcntl.h> 76 #include <ctype.h> 77 78 #include <fmd_alloc.h> 79 #include <fmd_error.h> 80 #include <fmd_string.h> 81 #include <fmd_event.h> 82 #include <fmd_conf.h> 83 #include <fmd_subr.h> 84 #include <fmd_case.h> 85 #include <fmd_log.h> 86 87 #include <fmd.h> 88 89 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA) 90 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA) 91 92 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL) 93 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION) 94 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL) 95 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER) 96 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT) 97 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC) 98 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC) 99 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST) 100 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR) 101 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR) 102 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE) 103 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET) 104 105 static ssize_t 106 fmd_log_write(fmd_log_t *lp, const void *buf, size_t n) 107 { 108 ssize_t resid = n; 109 ssize_t len; 110 111 ASSERT(MUTEX_HELD(&lp->log_lock)); 112 113 while (resid != 0) { 114 if ((len = write(lp->log_fd, buf, resid)) <= 0) 115 break; 116 117 resid -= len; 118 buf = (char *)buf + len; 119 } 120 121 if (resid == n && n != 0) 122 return (-1); 123 124 return (n - resid); 125 } 126 127 static int 128 fmd_log_write_hdr(fmd_log_t *lp, const char *tag) 129 { 130 ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5; 131 const char *osrel, *osver, *plat; 132 off64_t off = 0; 133 int err = 0; 134 135 (void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel); 136 (void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver); 137 (void) fmd_conf_getprop(fmd.d_conf, "platform", &plat); 138 139 err |= ea_set_group(&hdr, CAT_FMA_GROUP); 140 err |= ea_set_group(&toc, CAT_FMA_GROUP); 141 142 err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0); 143 err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0); 144 err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0); 145 err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0); 146 err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0); 147 err |= ea_set_item(&i5, CAT_FMA_OFFSET, &off, 0); 148 149 (void) ea_attach_to_group(&hdr, &i0); 150 (void) ea_attach_to_group(&hdr, &i1); 151 (void) ea_attach_to_group(&hdr, &i2); 152 (void) ea_attach_to_group(&hdr, &i3); 153 (void) ea_attach_to_group(&hdr, &i4); 154 (void) ea_attach_to_group(&toc, &i5); 155 156 if (err == 0) { 157 size_t hdr_size = ea_pack_object(&hdr, NULL, 0); 158 size_t toc_size = ea_pack_object(&toc, NULL, 0); 159 160 size_t size = hdr_size + toc_size; 161 void *buf = fmd_alloc(size, FMD_SLEEP); 162 163 (void) ea_pack_object(&hdr, buf, hdr_size); 164 (void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size); 165 166 if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L) 167 fmd_panic("failed to seek log %s", lp->log_name); 168 169 if (fmd_log_write(lp, buf, size) != size) 170 err = errno; /* save errno for fmd_set_errno() below */ 171 172 fmd_free(buf, size); 173 174 lp->log_toc = lp->log_off + hdr_size; 175 lp->log_beg = lp->log_off + hdr_size + toc_size; 176 lp->log_off = lp->log_off + hdr_size + toc_size; 177 178 if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END)) 179 fmd_panic("eof off != log_off 0x%llx\n", lp->log_off); 180 } else 181 err = EFMD_LOG_EXACCT; 182 183 (void) ea_free_item(&i0, EUP_ALLOC); 184 (void) ea_free_item(&i1, EUP_ALLOC); 185 (void) ea_free_item(&i2, EUP_ALLOC); 186 (void) ea_free_item(&i3, EUP_ALLOC); 187 (void) ea_free_item(&i4, EUP_ALLOC); 188 (void) ea_free_item(&i5, EUP_ALLOC); 189 190 return (err ? fmd_set_errno(err) : 0); 191 } 192 193 static int 194 fmd_log_check_err(fmd_log_t *lp, int err, const char *msg) 195 { 196 int eaerr = ea_error(); 197 char buf[BUFSIZ]; 198 199 (void) snprintf(buf, sizeof (buf), "%s: %s: %s\n", 200 lp->log_name, msg, eaerr != EXR_OK ? 201 fmd_ea_strerror(eaerr) : "catalog tag mismatch"); 202 203 fmd_error(err, buf); 204 return (fmd_set_errno(err)); 205 } 206 207 static int 208 fmd_log_check_hdr(fmd_log_t *lp, const char *tag) 209 { 210 int got_version = 0, got_label = 0; 211 ea_object_t *grp, *obj; 212 off64_t hdr_off, hdr_size; 213 int dvers, fvers; 214 const char *p; 215 216 ea_clear(&lp->log_ea); /* resync exacct file */ 217 218 if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L) 219 fmd_panic("failed to seek log %s", lp->log_name); 220 221 /* 222 * Read the first group of log meta-data: the write-once read-only 223 * file header. We read all records in this group, ignoring all but 224 * the VERSION and LABEL, which are required and must be verified. 225 */ 226 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 227 grp->eo_catalog != CAT_FMA_GROUP) { 228 ea_free_object(grp, EUP_ALLOC); 229 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 230 "invalid fma hdr record group")); 231 } 232 233 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 234 switch (obj->eo_catalog) { 235 case CAT_FMA_VERSION: 236 for (dvers = 0, p = fmd.d_version; 237 *p != '\0'; p++) { 238 if (isdigit(*p)) 239 dvers = dvers * 10 + (*p - '0'); 240 else 241 break; 242 } 243 244 for (fvers = 0, p = obj->eo_item.ei_string; 245 *p != '\0'; p++) { 246 if (isdigit(*p)) 247 fvers = fvers * 10 + (*p - '0'); 248 else 249 break; 250 } 251 252 if (fvers > dvers) { 253 fmd_error(EFMD_LOG_INVAL, "%s: log version " 254 "%s is not supported by this daemon\n", 255 lp->log_name, obj->eo_item.ei_string); 256 ea_free_object(grp, EUP_ALLOC); 257 return (fmd_set_errno(EFMD_LOG_VERSION)); 258 } 259 260 got_version++; 261 break; 262 263 case CAT_FMA_LABEL: 264 if (strcmp(obj->eo_item.ei_string, tag) != 0) { 265 fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' " 266 "does not matched expected tag '%s'\n", 267 lp->log_name, obj->eo_item.ei_string, tag); 268 ea_free_object(grp, EUP_ALLOC); 269 return (fmd_set_errno(EFMD_LOG_INVAL)); 270 } 271 got_label++; 272 break; 273 } 274 } 275 276 hdr_size = ea_pack_object(grp, NULL, 0); 277 ea_free_object(grp, EUP_ALLOC); 278 279 if (!got_version || !got_label) { 280 fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not " 281 "include mandatory version and/or label\n", lp->log_name); 282 return (fmd_set_errno(EFMD_LOG_INVAL)); 283 } 284 285 /* 286 * Read the second group of log meta-data: the table of contents. We 287 * expect this group to contain an OFFSET object indicating the current 288 * value of log_skip. We save this in our fmd_log_t and then return. 289 */ 290 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL || 291 grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 || 292 grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) { 293 ea_free_object(grp, EUP_ALLOC); 294 return (fmd_log_check_err(lp, EFMD_LOG_INVAL, 295 "invalid fma toc record group")); 296 } 297 298 lp->log_toc = hdr_off + hdr_size; 299 lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0); 300 lp->log_off = lseek64(lp->log_fd, 0, SEEK_END); 301 lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64; 302 303 if (lp->log_skip > lp->log_off) { 304 fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; " 305 "resetting to zero\n", lp->log_name, lp->log_skip); 306 lp->log_skip = 0; 307 } 308 309 ea_free_object(grp, EUP_ALLOC); 310 return (0); 311 } 312 313 static int 314 fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags) 315 { 316 int fd = dup(lp->log_fd); 317 const char *creator; 318 319 (void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator); 320 321 if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) { 322 fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n", 323 lp->log_name, fmd_ea_strerror(ea_error())); 324 (void) close(fd); 325 return (fmd_set_errno(EFMD_LOG_EXACCT)); 326 } 327 328 lp->log_flags |= FMD_LF_EAOPEN; 329 return (0); 330 } 331 332 static fmd_log_t * 333 fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags) 334 { 335 fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP); 336 337 char buf[PATH_MAX]; 338 size_t len; 339 int err; 340 341 (void) pthread_mutex_init(&lp->log_lock, NULL); 342 (void) pthread_cond_init(&lp->log_cv, NULL); 343 (void) pthread_mutex_lock(&lp->log_lock); 344 345 len = strlen(root) + strlen(name) + 2; /* for "/" and "\0" */ 346 lp->log_name = fmd_alloc(len, FMD_SLEEP); 347 (void) snprintf(lp->log_name, len, "%s/%s", root, name); 348 lp->log_tag = fmd_strdup(tag, FMD_SLEEP); 349 (void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree); 350 351 if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0) 352 lp->log_flags |= FMD_LF_REPLAY; 353 354 top: 355 if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 || 356 fstat64(lp->log_fd, &lp->log_stat) == -1) { 357 fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name); 358 fmd_log_close(lp); 359 return (NULL); 360 } 361 362 /* 363 * If our open() created the log file, use libexacct to write a header 364 * and position the file just after the header (EO_TAIL). If the log 365 * file already existed, use libexacct to validate the header and again 366 * position the file just after the header (EO_HEAD). Note that we lie 367 * to libexacct about 'oflags' in order to achieve the desired result. 368 */ 369 if (lp->log_stat.st_size == 0) { 370 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL, 371 O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag); 372 } else { 373 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD, 374 O_RDONLY) || fmd_log_check_hdr(lp, tag); 375 } 376 377 /* 378 * If ea_fdopen() failed and the log was pre-existing, attempt to move 379 * it aside and start a new one. If we created the log but failed to 380 * initialize it, then we have no choice but to give up (e.g. EROFS). 381 */ 382 if (err) { 383 fmd_error(EFMD_LOG_OPEN, 384 "failed to initialize log %s", lp->log_name); 385 386 if (lp->log_flags & FMD_LF_EAOPEN) { 387 lp->log_flags &= ~FMD_LF_EAOPEN; 388 (void) ea_close(&lp->log_ea); 389 } 390 391 (void) close(lp->log_fd); 392 lp->log_fd = -1; 393 394 if (lp->log_stat.st_size != 0 && snprintf(buf, 395 sizeof (buf), "%s-", lp->log_name) < PATH_MAX && 396 rename(lp->log_name, buf) == 0) { 397 TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf)); 398 if (oflags & O_CREAT) 399 goto top; 400 } 401 402 fmd_log_close(lp); 403 return (NULL); 404 } 405 406 lp->log_refs++; 407 (void) pthread_mutex_unlock(&lp->log_lock); 408 409 return (lp); 410 } 411 412 fmd_log_t * 413 fmd_log_tryopen(const char *root, const char *name, const char *tag) 414 { 415 return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC)); 416 } 417 418 fmd_log_t * 419 fmd_log_open(const char *root, const char *name, const char *tag) 420 { 421 return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC)); 422 } 423 424 void 425 fmd_log_close(fmd_log_t *lp) 426 { 427 ASSERT(MUTEX_HELD(&lp->log_lock)); 428 ASSERT(lp->log_refs == 0); 429 430 if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) { 431 fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n", 432 lp->log_name, fmd_ea_strerror(ea_error())); 433 } 434 435 if (lp->log_fd >= 0 && close(lp->log_fd) != 0) { 436 fmd_error(EFMD_LOG_CLOSE, 437 "failed to close log %s", lp->log_name); 438 } 439 440 fmd_strfree(lp->log_name); 441 fmd_strfree(lp->log_tag); 442 443 fmd_free(lp, sizeof (fmd_log_t)); 444 } 445 446 void 447 fmd_log_hold_pending(fmd_log_t *lp) 448 { 449 (void) pthread_mutex_lock(&lp->log_lock); 450 451 lp->log_refs++; 452 ASSERT(lp->log_refs != 0); 453 454 if (lp->log_flags & FMD_LF_REPLAY) { 455 lp->log_pending++; 456 ASSERT(lp->log_pending != 0); 457 } 458 459 (void) pthread_mutex_unlock(&lp->log_lock); 460 } 461 462 void 463 fmd_log_hold(fmd_log_t *lp) 464 { 465 (void) pthread_mutex_lock(&lp->log_lock); 466 lp->log_refs++; 467 ASSERT(lp->log_refs != 0); 468 (void) pthread_mutex_unlock(&lp->log_lock); 469 } 470 471 void 472 fmd_log_rele(fmd_log_t *lp) 473 { 474 (void) pthread_mutex_lock(&lp->log_lock); 475 ASSERT(lp->log_refs != 0); 476 477 if (--lp->log_refs == 0) 478 fmd_log_close(lp); 479 else 480 (void) pthread_mutex_unlock(&lp->log_lock); 481 } 482 483 void 484 fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp) 485 { 486 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 487 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 488 int err = 0; 489 490 ea_object_t grp0, grp1, i0, i1, i2, *items; 491 size_t nvsize, easize, itsize; 492 char *nvbuf, *eabuf; 493 statvfs64_t stv; 494 495 (void) pthread_mutex_lock(&ep->ev_lock); 496 497 ASSERT(ep->ev_flags & FMD_EVF_VOLATILE); 498 ASSERT(ep->ev_log == NULL); 499 500 (void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR); 501 nvbuf = fmd_alloc(nvsize, FMD_SLEEP); 502 (void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0); 503 504 if (lp->log_flags & FMD_LF_REPLAY) 505 err |= ea_set_group(&grp0, CAT_FMA_RGROUP); 506 else 507 err |= ea_set_group(&grp0, CAT_FMA_GROUP); 508 509 err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0); 510 err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0); 511 err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize); 512 513 if (err != 0) { 514 (void) pthread_mutex_unlock(&ep->ev_lock); 515 err = EFMD_LOG_EXACCT; 516 goto exerr; 517 } 518 519 (void) ea_attach_to_group(&grp0, &i0); 520 (void) ea_attach_to_group(&grp0, &i1); 521 (void) ea_attach_to_group(&grp0, &i2); 522 523 /* 524 * If this event has a case associated with it (i.e. it is a list), 525 * then allocate a block of ea_object_t's and fill in a group for 526 * each event saved in the case's item list. For each such group, 527 * we attach it to grp1, which in turn will be attached to grp0. 528 * This section of code cannot fail as we only manipulate integer 529 * objects, which require no underlying libexacct memory allocation. 530 */ 531 if (cp != NULL) { 532 ea_object_t *egrp, *ip; 533 fmd_event_impl_t *eip; 534 fmd_case_item_t *cit; 535 536 (void) ea_set_group(&grp1, CAT_FMA_GROUP); 537 itsize = sizeof (ea_object_t) * cip->ci_nitems * 5; 538 items = ip = fmd_alloc(itsize, FMD_SLEEP); 539 540 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 541 major_t maj; 542 minor_t min; 543 544 eip = (fmd_event_impl_t *)cit->cit_event; 545 546 if (eip->ev_log == NULL) 547 continue; /* event was never logged */ 548 549 maj = major(eip->ev_log->log_stat.st_dev); 550 min = minor(eip->ev_log->log_stat.st_dev); 551 552 (void) ea_set_group(ip, CAT_FMA_GROUP); 553 egrp = ip++; /* first obj is group */ 554 555 (void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0); 556 (void) ea_attach_to_group(egrp, ip++); 557 558 (void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0); 559 (void) ea_attach_to_group(egrp, ip++); 560 561 (void) ea_set_item(ip, CAT_FMA_INODE, 562 &eip->ev_log->log_stat.st_ino, 0); 563 (void) ea_attach_to_group(egrp, ip++); 564 565 (void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0); 566 (void) ea_attach_to_group(egrp, ip++); 567 568 (void) ea_attach_to_group(&grp1, egrp); 569 } 570 571 (void) ea_attach_to_group(&grp0, &grp1); 572 } 573 574 easize = ea_pack_object(&grp0, NULL, 0); 575 eabuf = fmd_alloc(easize, FMD_SLEEP); 576 (void) ea_pack_object(&grp0, eabuf, easize); 577 578 /* 579 * Before writing the record, check to see if this would cause the free 580 * space in the filesystem to drop below our minfree threshold. If so, 581 * don't bother attempting the write and instead pretend it failed. As 582 * fmd(1M) runs as root, it will be able to access the space "reserved" 583 * for root, and therefore can run the system of out of disk space in a 584 * heavy error load situation, violating the basic design principle of 585 * fmd(1M) that we don't want to make a bad situation even worse. 586 */ 587 (void) pthread_mutex_lock(&lp->log_lock); 588 589 if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 && 590 stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) { 591 592 TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag)); 593 err = EFMD_LOG_MINFREE; 594 595 } else if (fmd_log_write(lp, eabuf, easize) == easize) { 596 TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx", 597 lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off)); 598 599 ep->ev_flags &= ~FMD_EVF_VOLATILE; 600 ep->ev_log = lp; 601 ep->ev_off = lp->log_off; 602 ep->ev_len = easize; 603 604 if (lp->log_flags & FMD_LF_REPLAY) { 605 lp->log_pending++; 606 ASSERT(lp->log_pending != 0); 607 } 608 609 lp->log_refs++; 610 ASSERT(lp->log_refs != 0); 611 lp->log_off += easize; 612 } else { 613 err = errno; /* save errno for fmd_error() call below */ 614 615 /* 616 * If we can't write append the record, seek the file back to 617 * the original location and truncate it there in order to make 618 * sure the file is always in a sane state w.r.t. libexacct. 619 */ 620 (void) lseek64(lp->log_fd, lp->log_off, SEEK_SET); 621 (void) ftruncate64(lp->log_fd, lp->log_off); 622 } 623 624 (void) pthread_mutex_unlock(&lp->log_lock); 625 (void) pthread_mutex_unlock(&ep->ev_lock); 626 627 if (cp != NULL) 628 fmd_free(items, itsize); 629 630 fmd_free(eabuf, easize); 631 exerr: 632 fmd_free(nvbuf, nvsize); 633 634 (void) ea_free_item(&i0, EUP_ALLOC); 635 (void) ea_free_item(&i1, EUP_ALLOC); 636 (void) ea_free_item(&i2, EUP_ALLOC); 637 638 /* 639 * Keep track of out-of-space errors using global statistics. As we're 640 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged. 641 */ 642 if (err == ENOSPC || err == EFMD_LOG_MINFREE) { 643 fmd_stat_t *sp; 644 645 if (lp == fmd.d_errlog) 646 sp = &fmd.d_stats->ds_err_enospc; 647 else if (lp == fmd.d_fltlog) 648 sp = &fmd.d_stats->ds_flt_enospc; 649 else 650 sp = &fmd.d_stats->ds_oth_enospc; 651 652 (void) pthread_mutex_lock(&fmd.d_stats_lock); 653 sp->fmds_value.ui64++; 654 (void) pthread_mutex_unlock(&fmd.d_stats_lock); 655 } 656 657 if (err != 0) { 658 fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n", 659 lp->log_tag, (void *)ep, fmd_strerror(err)); 660 } 661 } 662 663 /* 664 * Commit an event to the log permanently, indicating that it should not be 665 * replayed on restart. This is done by overwriting the event group's catalog 666 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We 667 * use pwrite64() to update the existing word directly, using somewhat guilty 668 * knowledge that exacct stores the 32-bit catalog word first for each object. 669 * Since we are overwriting an existing log location using pwrite64() and hold 670 * the event lock, we do not need to hold the log_lock during the i/o. 671 */ 672 void 673 fmd_log_commit(fmd_log_t *lp, fmd_event_t *e) 674 { 675 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 676 ea_catalog_t c; 677 int err = 0; 678 679 if (!(lp->log_flags & FMD_LF_REPLAY)) 680 return; /* log does not require replay tagging */ 681 682 ASSERT(MUTEX_HELD(&ep->ev_lock)); 683 ASSERT(ep->ev_log == lp && ep->ev_off != 0); 684 685 c = CAT_FMA_GROUP; 686 exacct_order32(&c); 687 688 if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) { 689 TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep)); 690 ep->ev_flags &= ~FMD_EVF_REPLAY; 691 692 /* 693 * If we have committed the event, check to see if the TOC skip 694 * offset needs to be updated, and decrement the pending count. 695 */ 696 (void) pthread_mutex_lock(&lp->log_lock); 697 698 if (lp->log_skip == ep->ev_off) { 699 lp->log_flags |= FMD_LF_DIRTY; 700 lp->log_skip += ep->ev_len; 701 } 702 703 ASSERT(lp->log_pending != 0); 704 lp->log_pending--; 705 706 (void) pthread_mutex_unlock(&lp->log_lock); 707 (void) pthread_cond_broadcast(&lp->log_cv); 708 709 } else { 710 fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n", 711 lp->log_tag, (void *)ep, fmd_strerror(err)); 712 } 713 } 714 715 /* 716 * If we need to destroy an event and it wasn't able to be committed, we permit 717 * the owner to decommit from ever trying again. This operation decrements the 718 * pending count on the log and broadcasts to anyone waiting on log_cv. 719 */ 720 void 721 fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e) 722 { 723 fmd_event_impl_t *ep = (fmd_event_impl_t *)e; 724 725 if (!(lp->log_flags & FMD_LF_REPLAY)) 726 return; /* log does not require replay tagging */ 727 728 ASSERT(MUTEX_HELD(&ep->ev_lock)); 729 ASSERT(ep->ev_log == lp); 730 731 (void) pthread_mutex_lock(&lp->log_lock); 732 733 TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep)); 734 ep->ev_flags &= ~FMD_EVF_REPLAY; 735 736 ASSERT(lp->log_pending != 0); 737 lp->log_pending--; 738 739 (void) pthread_mutex_unlock(&lp->log_lock); 740 (void) pthread_cond_broadcast(&lp->log_cv); 741 } 742 743 static fmd_event_t * 744 fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off) 745 { 746 fmd_timeval_t ftv = { -1ULL, -1ULL }; 747 nvlist_t *nvl = NULL; 748 749 ea_object_t *obj; 750 char *class; 751 int err; 752 753 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) { 754 switch (obj->eo_catalog) { 755 case CAT_FMA_NVLIST: 756 if ((err = nvlist_xunpack(obj->eo_item.ei_raw, 757 obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) { 758 fmd_error(EFMD_LOG_UNPACK, "failed to unpack " 759 "log nvpair: %s\n", fmd_strerror(err)); 760 return (NULL); 761 } 762 break; 763 764 case CAT_FMA_TODSEC: 765 ftv.ftv_sec = obj->eo_item.ei_uint64; 766 break; 767 768 case CAT_FMA_TODNSEC: 769 ftv.ftv_nsec = obj->eo_item.ei_uint64; 770 break; 771 } 772 } 773 774 if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) { 775 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 776 "required object(s) missing from record group\n"); 777 nvlist_free(nvl); 778 return (NULL); 779 } 780 781 if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) { 782 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: " 783 "record is missing required '%s' nvpair\n", FM_CLASS); 784 nvlist_free(nvl); 785 return (NULL); 786 } 787 788 return (fmd_event_recreate(FMD_EVT_PROTOCOL, 789 &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0))); 790 } 791 792 /* 793 * Replay event(s) from the specified log by invoking the specified callback 794 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set, 795 * we replay all events after log_skip that have the FMA_RGROUP group tag. 796 * This mode is used for the error telemetry log. If the log does not have 797 * this flag set (used for ASRU logs), only the most recent event is replayed. 798 */ 799 void 800 fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data) 801 { 802 ea_object_t obj, *grp; 803 ea_object_type_t type; 804 ea_catalog_t c; 805 fmd_event_t *ep; 806 off64_t off, skp; 807 uint_t n = 0; 808 809 (void) pthread_mutex_lock(&lp->log_lock); 810 811 if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) { 812 (void) pthread_mutex_unlock(&lp->log_lock); 813 return; /* we just created this log: never replay events */ 814 } 815 816 while (lp->log_flags & FMD_LF_BUSY) 817 (void) pthread_cond_wait(&lp->log_cv, &lp->log_lock); 818 819 if (lp->log_off == lp->log_beg) { 820 (void) pthread_mutex_unlock(&lp->log_lock); 821 return; /* no records appended yet */ 822 } 823 824 lp->log_flags |= FMD_LF_BUSY; 825 skp = lp->log_skip; 826 ea_clear(&lp->log_ea); /* resync exacct file */ 827 828 /* 829 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it 830 * is non-zero) or at log_beg. Otherwise replay from the end (log_off) 831 */ 832 if (lp->log_flags & FMD_LF_REPLAY) { 833 off = MAX(lp->log_beg, lp->log_skip); 834 c = CAT_FMA_RGROUP; 835 } else { 836 off = lp->log_off; 837 c = CAT_FMA_GROUP; 838 } 839 840 if (lseek64(lp->log_fd, off, SEEK_SET) != off) { 841 fmd_panic("failed to seek %s to 0x%llx\n", 842 lp->log_name, (u_longlong_t)off); 843 } 844 845 /* 846 * If FMD_LF_REPLAY is not set, back up to the start of the previous 847 * object and make sure this object is an EO_GROUP; otherwise return. 848 */ 849 if (!(lp->log_flags & FMD_LF_REPLAY) && 850 (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) { 851 fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected " 852 "type %d (log may be truncated or corrupt)\n", type); 853 goto out; 854 } 855 856 while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) { 857 if (!(lp->log_flags & FMD_LF_REPLAY)) 858 off -= ea_pack_object(grp, NULL, 0); 859 else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP) 860 skp = off; /* update skip */ 861 862 /* 863 * We temporarily drop log_lock around the call to unpack the 864 * event, hold it, and perform the callback, because these 865 * operations may try to acquire log_lock to bump log_refs. 866 * We cannot lose control because the FMD_LF_BUSY flag is set. 867 */ 868 (void) pthread_mutex_unlock(&lp->log_lock); 869 870 if (grp->eo_catalog == c && 871 (ep = fmd_log_unpack(lp, grp, off)) != NULL) { 872 873 TRACE((FMD_DBG_LOG, "replay %s %p off %llx", 874 lp->log_tag, (void *)ep, (u_longlong_t)off)); 875 876 fmd_event_hold(ep); 877 func(lp, ep, data); 878 fmd_event_rele(ep); 879 n++; 880 } 881 882 (void) pthread_mutex_lock(&lp->log_lock); 883 off += ea_pack_object(grp, NULL, 0); 884 ea_free_object(grp, EUP_ALLOC); 885 } 886 887 if (ea_error() != EXR_EOF) { 888 fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at " 889 "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off, 890 fmd_ea_strerror(ea_error())); 891 } 892 893 if (n == 0) 894 skp = off; /* if no replays, move skip to where we ended up */ 895 896 out: 897 if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) { 898 fmd_panic("failed to seek %s to 0x%llx\n", 899 lp->log_name, (u_longlong_t)lp->log_off); 900 } 901 902 if (skp != lp->log_skip) { 903 lp->log_flags |= FMD_LF_DIRTY; 904 lp->log_skip = skp; 905 } 906 907 lp->log_flags &= ~FMD_LF_BUSY; 908 (void) pthread_mutex_unlock(&lp->log_lock); 909 (void) pthread_cond_broadcast(&lp->log_cv); 910 } 911 912 void 913 fmd_log_update(fmd_log_t *lp) 914 { 915 ea_object_t toc, item; 916 off64_t skip = 0; 917 size_t size; 918 void *buf; 919 920 (void) pthread_mutex_lock(&lp->log_lock); 921 922 if (lp->log_flags & FMD_LF_DIRTY) { 923 lp->log_flags &= ~FMD_LF_DIRTY; 924 skip = lp->log_skip; 925 } 926 927 (void) pthread_mutex_unlock(&lp->log_lock); 928 929 /* 930 * If the skip needs to be updated, construct a TOC record group 931 * containing the skip offset and overwrite the TOC in-place. 932 */ 933 if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 && 934 ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) { 935 936 (void) ea_attach_to_group(&toc, &item); 937 size = ea_pack_object(&toc, NULL, 0); 938 buf = fmd_alloc(size, FMD_SLEEP); 939 940 (void) ea_pack_object(&toc, buf, size); 941 ASSERT(lp->log_toc + size == lp->log_beg); 942 943 if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) { 944 TRACE((FMD_DBG_LOG, "updated skip to %llx", skip)); 945 } else { 946 fmd_error(EFMD_LOG_UPDATE, 947 "failed to log_update %s", lp->log_tag); 948 } 949 950 fmd_free(buf, size); 951 (void) ea_free_item(&item, EUP_ALLOC); 952 } 953 } 954 955 /* 956 * Rotate the specified log by renaming its underlying file to a staging file 957 * that can be handed off to logadm(1M) or an administrator script. If the 958 * rename succeeds, open a new log file using the old path and return it. 959 * Note that we are relying our caller to use some higher-level mechanism to 960 * ensure that fmd_log_rotate() cannot be called while other threads are 961 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used 962 * for the global errlog and fltlog). 963 */ 964 fmd_log_t * 965 fmd_log_rotate(fmd_log_t *lp) 966 { 967 char npath[PATH_MAX]; 968 fmd_log_t *nlp; 969 970 (void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name); 971 (void) pthread_mutex_lock(&lp->log_lock); 972 973 /* 974 * Check for any pending commits to drain before proceeding. We can't 975 * rotate the log out if commits are pending because if we die after 976 * the log is moved aside, we won't be able to replay them on restart. 977 */ 978 if (lp->log_pending != 0) { 979 (void) pthread_mutex_unlock(&lp->log_lock); 980 (void) fmd_set_errno(EFMD_LOG_ROTBUSY); 981 return (NULL); 982 } 983 984 if (rename(lp->log_name, npath) != 0) { 985 (void) pthread_mutex_unlock(&lp->log_lock); 986 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name); 987 (void) fmd_set_errno(EFMD_LOG_ROTATE); 988 return (NULL); 989 } 990 991 if ((nlp = fmd_log_open("", lp->log_name, lp->log_tag)) == NULL) { 992 (void) rename(npath, lp->log_name); 993 (void) pthread_mutex_unlock(&lp->log_lock); 994 fmd_error(EFMD_LOG_ROTATE, "failed to reopen %s", lp->log_name); 995 (void) fmd_set_errno(EFMD_LOG_ROTATE); 996 return (NULL); 997 } 998 999 /* 1000 * If we've rotated the log, no pending events exist so we don't have 1001 * any more commits coming, and our caller should have arranged for 1002 * no more calls to append. As such, we can close log_fd for good. 1003 */ 1004 if (lp->log_flags & FMD_LF_EAOPEN) { 1005 (void) ea_close(&lp->log_ea); 1006 lp->log_flags &= ~FMD_LF_EAOPEN; 1007 } 1008 1009 (void) close(lp->log_fd); 1010 lp->log_fd = -1; 1011 1012 (void) pthread_mutex_unlock(&lp->log_lock); 1013 return (nlp); 1014 } 1015