1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * FMD Log File Subsystem
31 *
32 * Events are written to one of two log files as they are received or created;
33 * the error log tracks all ereport.* events received on the inbound event
34 * transport, and the fault log tracks all list.* events generated by fmd or
35 * its client modules. In addition, we use the same log file format to cache
36 * state and events associated with ASRUs that are named in a diagnosis.
37 *
38 * The log files use the exacct format manipulated by libexacct(3LIB) and
39 * originally defined in PSARC 1999/119. However, the exacct library was
40 * designed primarily for read-only clients and without the synchronous i/o
41 * considerations and seeking required for fmd, so we use libexacct here only
42 * to read and write the file headers and to pack data from memory into a file
43 * bytestream. All of the i/o and file offset manipulations are performed by
44 * the fmd code below. Our exacct file management uses the following grammar:
45 *
46 * file := hdr toc event*
47 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER
48 * EXD_FMA_PLAT EXD_FMA_UUID
49 * toc := EXD_FMA_OFFSET
50 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* or legacy evref
51 * evref := EXD_FMA_UUID EXD_FMA_OFFSET
52 * legacy evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET
53 *
54 * Any event can be uniquely identified by the tuple (file, offset) where file
55 * is encoded as (uuid) when we are cross-linking files. For legacy file
56 * formats we still support encoding the reference as (major, minor, inode).
57 * Note that we break out of the file's dev_t into its two 32-bit components to
58 * permit development of either 32-bit or 64-bit log readers and writers; the
59 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way
60 * for a 32-bit application to retrieve and store a 64-bit dev_t.
61 *
62 * In order to replay events in the event of an fmd crash, events are initially
63 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the
64 * fmd_log_append() function. Later, once an event transitions from the
65 * received state to one of its other states (see fmd_event.c for details),
66 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating
67 * that the event is fully processed and no longer needs to be replayed.
68 */
69
70 #include <sys/types.h>
71 #include <sys/mkdev.h>
72 #include <sys/statvfs.h>
73 #include <sys/fm/protocol.h>
74 #include <sys/exacct_impl.h>
75 #include <uuid/uuid.h>
76
77 #include <unistd.h>
78 #include <limits.h>
79 #include <fcntl.h>
80 #include <ctype.h>
81
82 #include <fmd_alloc.h>
83 #include <fmd_error.h>
84 #include <fmd_string.h>
85 #include <fmd_event.h>
86 #include <fmd_conf.h>
87 #include <fmd_subr.h>
88 #include <fmd_case.h>
89 #include <fmd_log.h>
90
91 #include <fmd.h>
92
93 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA)
94 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA)
95
96 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL)
97 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION)
98 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL)
99 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER)
100 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT)
101 #define CAT_FMA_UUID (EXT_STRING | EXC_DEFAULT | EXD_FMA_UUID)
102 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC)
103 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC)
104 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST)
105 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR)
106 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR)
107 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE)
108 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET)
109
110 static ssize_t
fmd_log_write(fmd_log_t * lp,const void * buf,size_t n)111 fmd_log_write(fmd_log_t *lp, const void *buf, size_t n)
112 {
113 ssize_t resid = n;
114 ssize_t len;
115
116 ASSERT(MUTEX_HELD(&lp->log_lock));
117
118 while (resid != 0) {
119 if ((len = write(lp->log_fd, buf, resid)) <= 0)
120 break;
121
122 resid -= len;
123 buf = (char *)buf + len;
124 }
125
126 if (resid == n && n != 0)
127 return (-1);
128
129 return (n - resid);
130 }
131
132 static int
fmd_log_write_hdr(fmd_log_t * lp,const char * tag)133 fmd_log_write_hdr(fmd_log_t *lp, const char *tag)
134 {
135 ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5, i6;
136 const char *osrel, *osver, *plat;
137 off64_t off = 0;
138 int err = 0;
139 uuid_t uuid;
140
141 (void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel);
142 (void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver);
143 (void) fmd_conf_getprop(fmd.d_conf, "platform", &plat);
144 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &lp->log_uuidlen);
145
146 lp->log_uuid = fmd_zalloc(lp->log_uuidlen + 1, FMD_SLEEP);
147 uuid_generate(uuid);
148 uuid_unparse(uuid, lp->log_uuid);
149
150 err |= ea_set_group(&hdr, CAT_FMA_GROUP);
151 err |= ea_set_group(&toc, CAT_FMA_GROUP);
152
153 err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0);
154 err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0);
155 err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0);
156 err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0);
157 err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0);
158 err |= ea_set_item(&i5, CAT_FMA_UUID, lp->log_uuid, 0);
159 err |= ea_set_item(&i6, CAT_FMA_OFFSET, &off, 0);
160
161 (void) ea_attach_to_group(&hdr, &i0);
162 (void) ea_attach_to_group(&hdr, &i1);
163 (void) ea_attach_to_group(&hdr, &i2);
164 (void) ea_attach_to_group(&hdr, &i3);
165 (void) ea_attach_to_group(&hdr, &i4);
166 (void) ea_attach_to_group(&hdr, &i5);
167 (void) ea_attach_to_group(&toc, &i6);
168
169 if (err == 0) {
170 size_t hdr_size = ea_pack_object(&hdr, NULL, 0);
171 size_t toc_size = ea_pack_object(&toc, NULL, 0);
172
173 size_t size = hdr_size + toc_size;
174 void *buf = fmd_alloc(size, FMD_SLEEP);
175
176 (void) ea_pack_object(&hdr, buf, hdr_size);
177 (void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size);
178
179 if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L)
180 fmd_panic("failed to seek log %s", lp->log_name);
181
182 if (fmd_log_write(lp, buf, size) != size)
183 err = errno; /* save errno for fmd_set_errno() below */
184
185 fmd_free(buf, size);
186
187 lp->log_toc = lp->log_off + hdr_size;
188 lp->log_beg = lp->log_off + hdr_size + toc_size;
189 lp->log_off = lp->log_off + hdr_size + toc_size;
190
191 if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END))
192 fmd_panic("eof off != log_off 0x%llx\n", lp->log_off);
193 } else
194 err = EFMD_LOG_EXACCT;
195
196 (void) ea_free_item(&i0, EUP_ALLOC);
197 (void) ea_free_item(&i1, EUP_ALLOC);
198 (void) ea_free_item(&i2, EUP_ALLOC);
199 (void) ea_free_item(&i3, EUP_ALLOC);
200 (void) ea_free_item(&i4, EUP_ALLOC);
201 (void) ea_free_item(&i5, EUP_ALLOC);
202 (void) ea_free_item(&i6, EUP_ALLOC);
203
204 return (err ? fmd_set_errno(err) : 0);
205 }
206
207 static int
fmd_log_check_err(fmd_log_t * lp,int err,const char * msg)208 fmd_log_check_err(fmd_log_t *lp, int err, const char *msg)
209 {
210 int eaerr = ea_error();
211 char buf[BUFSIZ];
212
213 (void) snprintf(buf, sizeof (buf), "%s: %s: %s\n",
214 lp->log_name, msg, eaerr != EXR_OK ?
215 fmd_ea_strerror(eaerr) : "catalog tag mismatch");
216
217 fmd_error(err, buf);
218 return (fmd_set_errno(err));
219 }
220
221 static int
fmd_log_check_hdr(fmd_log_t * lp,const char * tag)222 fmd_log_check_hdr(fmd_log_t *lp, const char *tag)
223 {
224 int got_version = 0, got_label = 0;
225 ea_object_t *grp, *obj;
226 off64_t hdr_off, hdr_size;
227 int dvers, fvers;
228 const char *p;
229
230 ea_clear(&lp->log_ea); /* resync exacct file */
231
232 if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L)
233 fmd_panic("failed to seek log %s", lp->log_name);
234
235 /*
236 * Read the first group of log meta-data: the write-once read-only
237 * file header. We read all records in this group, ignoring all but
238 * the VERSION and LABEL, which are required and must be verified.
239 */
240 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
241 grp->eo_catalog != CAT_FMA_GROUP) {
242 ea_free_object(grp, EUP_ALLOC);
243 return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
244 "invalid fma hdr record group"));
245 }
246
247 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
248 switch (obj->eo_catalog) {
249 case CAT_FMA_VERSION:
250 for (dvers = 0, p = fmd.d_version;
251 *p != '\0'; p++) {
252 if (isdigit(*p))
253 dvers = dvers * 10 + (*p - '0');
254 else
255 break;
256 }
257
258 for (fvers = 0, p = obj->eo_item.ei_string;
259 *p != '\0'; p++) {
260 if (isdigit(*p))
261 fvers = fvers * 10 + (*p - '0');
262 else
263 break;
264 }
265
266 if (fvers > dvers) {
267 fmd_error(EFMD_LOG_INVAL, "%s: log version "
268 "%s is not supported by this daemon\n",
269 lp->log_name, obj->eo_item.ei_string);
270 ea_free_object(grp, EUP_ALLOC);
271 return (fmd_set_errno(EFMD_LOG_VERSION));
272 }
273
274 got_version++;
275 break;
276
277 case CAT_FMA_LABEL:
278 if (strcmp(obj->eo_item.ei_string, tag) != 0) {
279 fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' "
280 "does not matched expected tag '%s'\n",
281 lp->log_name, obj->eo_item.ei_string, tag);
282 ea_free_object(grp, EUP_ALLOC);
283 return (fmd_set_errno(EFMD_LOG_INVAL));
284 }
285 got_label++;
286 break;
287 case CAT_FMA_UUID:
288 lp->log_uuid = fmd_strdup(obj->eo_item.ei_string,
289 FMD_SLEEP);
290 lp->log_uuidlen = strlen(lp->log_uuid);
291 break;
292 }
293 }
294
295 hdr_size = ea_pack_object(grp, NULL, 0);
296 ea_free_object(grp, EUP_ALLOC);
297
298 if (!got_version || !got_label) {
299 fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not "
300 "include mandatory version and/or label\n", lp->log_name);
301 return (fmd_set_errno(EFMD_LOG_INVAL));
302 }
303
304 /*
305 * Read the second group of log meta-data: the table of contents. We
306 * expect this group to contain an OFFSET object indicating the current
307 * value of log_skip. We save this in our fmd_log_t and then return.
308 */
309 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
310 grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 ||
311 grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) {
312 ea_free_object(grp, EUP_ALLOC);
313 return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
314 "invalid fma toc record group"));
315 }
316
317 lp->log_toc = hdr_off + hdr_size;
318 lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0);
319 lp->log_off = lseek64(lp->log_fd, 0, SEEK_END);
320 lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64;
321
322 if (lp->log_skip > lp->log_off) {
323 fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; "
324 "resetting to zero\n", lp->log_name, lp->log_skip);
325 lp->log_skip = 0;
326 }
327
328 ea_free_object(grp, EUP_ALLOC);
329 return (0);
330 }
331
332 static int
fmd_log_open_exacct(fmd_log_t * lp,int aflags,int oflags)333 fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags)
334 {
335 int fd = dup(lp->log_fd);
336 const char *creator;
337
338 (void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator);
339
340 if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) {
341 fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n",
342 lp->log_name, fmd_ea_strerror(ea_error()));
343 (void) close(fd);
344 return (fmd_set_errno(EFMD_LOG_EXACCT));
345 }
346
347 lp->log_flags |= FMD_LF_EAOPEN;
348 return (0);
349 }
350
351 static fmd_log_t *
fmd_log_xopen(const char * root,const char * name,const char * tag,int oflags)352 fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags)
353 {
354 fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP);
355
356 char buf[PATH_MAX];
357 char *slash = "/";
358 size_t len;
359 int err;
360
361 (void) pthread_mutex_init(&lp->log_lock, NULL);
362 (void) pthread_cond_init(&lp->log_cv, NULL);
363 (void) pthread_mutex_lock(&lp->log_lock);
364
365 if (strcmp(root, "") == 0)
366 slash = "";
367 len = strlen(root) + strlen(name) + strlen(slash) + 1; /* for "\0" */
368 lp->log_name = fmd_alloc(len, FMD_SLEEP);
369 (void) snprintf(lp->log_name, len, "%s%s%s", root, slash, name);
370 lp->log_tag = fmd_strdup(tag, FMD_SLEEP);
371 (void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree);
372
373 if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0)
374 lp->log_flags |= FMD_LF_REPLAY;
375
376 if (strcmp(lp->log_tag, FMD_LOG_XPRT) == 0)
377 oflags &= ~O_SYNC;
378
379 top:
380 if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 ||
381 fstat64(lp->log_fd, &lp->log_stat) == -1) {
382 fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name);
383 fmd_log_close(lp);
384 return (NULL);
385 }
386
387 /*
388 * If our open() created the log file, use libexacct to write a header
389 * and position the file just after the header (EO_TAIL). If the log
390 * file already existed, use libexacct to validate the header and again
391 * position the file just after the header (EO_HEAD). Note that we lie
392 * to libexacct about 'oflags' in order to achieve the desired result.
393 */
394 if (lp->log_stat.st_size == 0) {
395 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL,
396 O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag);
397 } else {
398 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD,
399 O_RDONLY) || fmd_log_check_hdr(lp, tag);
400 }
401
402 /*
403 * If ea_fdopen() failed and the log was pre-existing, attempt to move
404 * it aside and start a new one. If we created the log but failed to
405 * initialize it, then we have no choice but to give up (e.g. EROFS).
406 */
407 if (err) {
408 fmd_error(EFMD_LOG_OPEN,
409 "failed to initialize log %s", lp->log_name);
410
411 if (lp->log_flags & FMD_LF_EAOPEN) {
412 lp->log_flags &= ~FMD_LF_EAOPEN;
413 (void) ea_close(&lp->log_ea);
414 }
415
416 (void) close(lp->log_fd);
417 lp->log_fd = -1;
418
419 if (lp->log_stat.st_size != 0 && snprintf(buf,
420 sizeof (buf), "%s-", lp->log_name) < PATH_MAX &&
421 rename(lp->log_name, buf) == 0) {
422 TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf));
423 if (oflags & O_CREAT)
424 goto top;
425 }
426
427 fmd_log_close(lp);
428 return (NULL);
429 }
430
431 lp->log_refs++;
432 (void) pthread_mutex_unlock(&lp->log_lock);
433
434 return (lp);
435 }
436
437 fmd_log_t *
fmd_log_tryopen(const char * root,const char * name,const char * tag)438 fmd_log_tryopen(const char *root, const char *name, const char *tag)
439 {
440 return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC));
441 }
442
443 fmd_log_t *
fmd_log_open(const char * root,const char * name,const char * tag)444 fmd_log_open(const char *root, const char *name, const char *tag)
445 {
446 return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC));
447 }
448
449 void
fmd_log_close(fmd_log_t * lp)450 fmd_log_close(fmd_log_t *lp)
451 {
452 ASSERT(MUTEX_HELD(&lp->log_lock));
453 ASSERT(lp->log_refs == 0);
454
455 if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) {
456 fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n",
457 lp->log_name, fmd_ea_strerror(ea_error()));
458 }
459
460 if (lp->log_fd >= 0 && close(lp->log_fd) != 0) {
461 fmd_error(EFMD_LOG_CLOSE,
462 "failed to close log %s", lp->log_name);
463 }
464
465 fmd_strfree(lp->log_name);
466 fmd_strfree(lp->log_tag);
467 if (lp->log_uuid != NULL)
468 fmd_free(lp->log_uuid, lp->log_uuidlen + 1);
469
470 fmd_free(lp, sizeof (fmd_log_t));
471 }
472
473 void
fmd_log_hold_pending(fmd_log_t * lp)474 fmd_log_hold_pending(fmd_log_t *lp)
475 {
476 (void) pthread_mutex_lock(&lp->log_lock);
477
478 lp->log_refs++;
479 ASSERT(lp->log_refs != 0);
480
481 if (lp->log_flags & FMD_LF_REPLAY) {
482 lp->log_pending++;
483 ASSERT(lp->log_pending != 0);
484 }
485
486 (void) pthread_mutex_unlock(&lp->log_lock);
487 }
488
489 void
fmd_log_hold(fmd_log_t * lp)490 fmd_log_hold(fmd_log_t *lp)
491 {
492 (void) pthread_mutex_lock(&lp->log_lock);
493 lp->log_refs++;
494 ASSERT(lp->log_refs != 0);
495 (void) pthread_mutex_unlock(&lp->log_lock);
496 }
497
498 void
fmd_log_rele(fmd_log_t * lp)499 fmd_log_rele(fmd_log_t *lp)
500 {
501 (void) pthread_mutex_lock(&lp->log_lock);
502 ASSERT(lp->log_refs != 0);
503
504 if (--lp->log_refs == 0)
505 fmd_log_close(lp);
506 else
507 (void) pthread_mutex_unlock(&lp->log_lock);
508 }
509
510 void
fmd_log_append(fmd_log_t * lp,fmd_event_t * e,fmd_case_t * cp)511 fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp)
512 {
513 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
514 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
515 int err = 0;
516
517 ea_object_t grp0, grp1, i0, i1, i2, *items;
518 ea_object_t **fe = NULL;
519 size_t nvsize, easize, itsize, frsize;
520 char *nvbuf, *eabuf;
521 statvfs64_t stv;
522
523 (void) pthread_mutex_lock(&ep->ev_lock);
524
525 ASSERT(ep->ev_flags & FMD_EVF_VOLATILE);
526 ASSERT(ep->ev_log == NULL);
527
528 (void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR);
529 nvbuf = fmd_alloc(nvsize, FMD_SLEEP);
530 (void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
531
532 if (lp->log_flags & FMD_LF_REPLAY)
533 err |= ea_set_group(&grp0, CAT_FMA_RGROUP);
534 else
535 err |= ea_set_group(&grp0, CAT_FMA_GROUP);
536
537 err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0);
538 err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0);
539 err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize);
540
541 if (err != 0) {
542 (void) pthread_mutex_unlock(&ep->ev_lock);
543 err = EFMD_LOG_EXACCT;
544 goto exerr;
545 }
546
547 (void) ea_attach_to_group(&grp0, &i0);
548 (void) ea_attach_to_group(&grp0, &i1);
549 (void) ea_attach_to_group(&grp0, &i2);
550
551 /*
552 * If this event has a case associated with it (i.e. it is a list),
553 * then allocate a block of ea_object_t's and fill in a group for
554 * each event saved in the case's item list. For each such group,
555 * we attach it to grp1, which in turn will be attached to grp0.
556 */
557 if (cp != NULL) {
558 ea_object_t *egrp, *ip, **fp;
559 fmd_event_impl_t *eip;
560 fmd_case_item_t *cit;
561
562 (void) ea_set_group(&grp1, CAT_FMA_GROUP);
563 frsize = sizeof (ea_object_t *) * cip->ci_nitems;
564 itsize = sizeof (ea_object_t) * cip->ci_nitems * 5;
565 items = ip = fmd_alloc(itsize, FMD_SLEEP);
566
567 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
568 major_t maj;
569 minor_t min;
570
571 eip = (fmd_event_impl_t *)cit->cit_event;
572
573 if (eip->ev_log == NULL)
574 continue; /* event was never logged */
575
576 maj = major(eip->ev_log->log_stat.st_dev);
577 min = minor(eip->ev_log->log_stat.st_dev);
578
579 (void) ea_set_group(ip, CAT_FMA_GROUP);
580 egrp = ip++; /* first obj is group */
581
582 /*
583 * If the event log file is in legacy format,
584 * then write the xref to the file in the legacy
585 * maj/min/inode method else write it using the
586 * file uuid.
587 */
588 if (eip->ev_log->log_uuid == NULL) {
589 (void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0);
590 (void) ea_attach_to_group(egrp, ip++);
591 (void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0);
592 (void) ea_attach_to_group(egrp, ip++);
593 (void) ea_set_item(ip, CAT_FMA_INODE,
594 &eip->ev_log->log_stat.st_ino, 0);
595 (void) ea_attach_to_group(egrp, ip++);
596 } else {
597 if (ea_set_item(ip, CAT_FMA_UUID,
598 eip->ev_log->log_uuid, 0) == -1) {
599 err = EFMD_LOG_EXACCT;
600 goto exerrcp;
601 }
602 if (fe == NULL)
603 fe = fp = fmd_zalloc(frsize, FMD_SLEEP);
604 *fp++ = ip;
605 (void) ea_attach_to_group(egrp, ip++);
606 }
607 (void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0);
608 (void) ea_attach_to_group(egrp, ip++);
609 (void) ea_attach_to_group(&grp1, egrp);
610 }
611 (void) ea_attach_to_group(&grp0, &grp1);
612 }
613
614 easize = ea_pack_object(&grp0, NULL, 0);
615 eabuf = fmd_alloc(easize, FMD_SLEEP);
616 (void) ea_pack_object(&grp0, eabuf, easize);
617
618 /*
619 * Before writing the record, check to see if this would cause the free
620 * space in the filesystem to drop below our minfree threshold. If so,
621 * don't bother attempting the write and instead pretend it failed. As
622 * fmd(1M) runs as root, it will be able to access the space "reserved"
623 * for root, and therefore can run the system of out of disk space in a
624 * heavy error load situation, violating the basic design principle of
625 * fmd(1M) that we don't want to make a bad situation even worse.
626 */
627 (void) pthread_mutex_lock(&lp->log_lock);
628
629 if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 &&
630 stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) {
631
632 TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag));
633 err = EFMD_LOG_MINFREE;
634
635 } else if (fmd_log_write(lp, eabuf, easize) == easize) {
636 TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx",
637 lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off));
638
639 ep->ev_flags &= ~FMD_EVF_VOLATILE;
640 ep->ev_log = lp;
641 ep->ev_off = lp->log_off;
642 ep->ev_len = easize;
643
644 if (lp->log_flags & FMD_LF_REPLAY) {
645 lp->log_pending++;
646 ASSERT(lp->log_pending != 0);
647 }
648
649 lp->log_refs++;
650 ASSERT(lp->log_refs != 0);
651 lp->log_off += easize;
652 } else {
653 err = errno; /* save errno for fmd_error() call below */
654
655 /*
656 * If we can't write append the record, seek the file back to
657 * the original location and truncate it there in order to make
658 * sure the file is always in a sane state w.r.t. libexacct.
659 */
660 (void) lseek64(lp->log_fd, lp->log_off, SEEK_SET);
661 (void) ftruncate64(lp->log_fd, lp->log_off);
662 }
663
664 (void) pthread_mutex_unlock(&lp->log_lock);
665 (void) pthread_mutex_unlock(&ep->ev_lock);
666
667 fmd_free(eabuf, easize);
668
669 exerrcp:
670 if (cp != NULL) {
671 if (fe != NULL) {
672 ea_object_t **fp = fe;
673 int i = 0;
674
675 for (; *fp != NULL && i < cip->ci_nitems; i++)
676 (void) ea_free_item(*fp++, EUP_ALLOC);
677 fmd_free(fe, frsize);
678 }
679
680 fmd_free(items, itsize);
681 }
682
683 exerr:
684 fmd_free(nvbuf, nvsize);
685
686 (void) ea_free_item(&i0, EUP_ALLOC);
687 (void) ea_free_item(&i1, EUP_ALLOC);
688 (void) ea_free_item(&i2, EUP_ALLOC);
689
690 /*
691 * Keep track of out-of-space errors using global statistics. As we're
692 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged.
693 */
694 if (err == ENOSPC || err == EFMD_LOG_MINFREE) {
695 fmd_stat_t *sp;
696
697 if (lp == fmd.d_errlog)
698 sp = &fmd.d_stats->ds_err_enospc;
699 else if (lp == fmd.d_fltlog)
700 sp = &fmd.d_stats->ds_flt_enospc;
701 else
702 sp = &fmd.d_stats->ds_oth_enospc;
703
704 (void) pthread_mutex_lock(&fmd.d_stats_lock);
705 sp->fmds_value.ui64++;
706 (void) pthread_mutex_unlock(&fmd.d_stats_lock);
707 }
708
709 if (err != 0) {
710 fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n",
711 lp->log_tag, (void *)ep, fmd_strerror(err));
712 }
713 }
714
715 /*
716 * Commit an event to the log permanently, indicating that it should not be
717 * replayed on restart. This is done by overwriting the event group's catalog
718 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We
719 * use pwrite64() to update the existing word directly, using somewhat guilty
720 * knowledge that exacct stores the 32-bit catalog word first for each object.
721 * Since we are overwriting an existing log location using pwrite64() and hold
722 * the event lock, we do not need to hold the log_lock during the i/o.
723 */
724 void
fmd_log_commit(fmd_log_t * lp,fmd_event_t * e)725 fmd_log_commit(fmd_log_t *lp, fmd_event_t *e)
726 {
727 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
728 ea_catalog_t c;
729 int err = 0;
730
731 if (!(lp->log_flags & FMD_LF_REPLAY))
732 return; /* log does not require replay tagging */
733
734 ASSERT(MUTEX_HELD(&ep->ev_lock));
735 ASSERT(ep->ev_log == lp && ep->ev_off != 0);
736
737 c = CAT_FMA_GROUP;
738 exacct_order32(&c);
739
740 if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) {
741 TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep));
742 ep->ev_flags &= ~FMD_EVF_REPLAY;
743
744 /*
745 * If we have committed the event, check to see if the TOC skip
746 * offset needs to be updated, and decrement the pending count.
747 */
748 (void) pthread_mutex_lock(&lp->log_lock);
749
750 if (lp->log_skip == ep->ev_off) {
751 lp->log_flags |= FMD_LF_DIRTY;
752 lp->log_skip += ep->ev_len;
753 }
754
755 ASSERT(lp->log_pending != 0);
756 lp->log_pending--;
757
758 (void) pthread_cond_broadcast(&lp->log_cv);
759 (void) pthread_mutex_unlock(&lp->log_lock);
760
761 } else {
762 fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n",
763 lp->log_tag, (void *)ep, fmd_strerror(err));
764 }
765 }
766
767 /*
768 * If we need to destroy an event and it wasn't able to be committed, we permit
769 * the owner to decommit from ever trying again. This operation decrements the
770 * pending count on the log and broadcasts to anyone waiting on log_cv.
771 */
772 void
fmd_log_decommit(fmd_log_t * lp,fmd_event_t * e)773 fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e)
774 {
775 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
776
777 if (!(lp->log_flags & FMD_LF_REPLAY))
778 return; /* log does not require replay tagging */
779
780 ASSERT(MUTEX_HELD(&ep->ev_lock));
781 ASSERT(ep->ev_log == lp);
782
783 (void) pthread_mutex_lock(&lp->log_lock);
784
785 TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep));
786 ep->ev_flags &= ~FMD_EVF_REPLAY;
787
788 ASSERT(lp->log_pending != 0);
789 lp->log_pending--;
790
791 (void) pthread_cond_broadcast(&lp->log_cv);
792 (void) pthread_mutex_unlock(&lp->log_lock);
793 }
794
795 static fmd_event_t *
fmd_log_unpack(fmd_log_t * lp,ea_object_t * grp,off64_t off)796 fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off)
797 {
798 fmd_timeval_t ftv = { -1ULL, -1ULL };
799 nvlist_t *nvl = NULL;
800
801 ea_object_t *obj;
802 char *class;
803 int err;
804
805 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
806 switch (obj->eo_catalog) {
807 case CAT_FMA_NVLIST:
808 if ((err = nvlist_xunpack(obj->eo_item.ei_raw,
809 obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) {
810 fmd_error(EFMD_LOG_UNPACK, "failed to unpack "
811 "log nvpair: %s\n", fmd_strerror(err));
812 return (NULL);
813 }
814 break;
815
816 case CAT_FMA_TODSEC:
817 ftv.ftv_sec = obj->eo_item.ei_uint64;
818 break;
819
820 case CAT_FMA_TODNSEC:
821 ftv.ftv_nsec = obj->eo_item.ei_uint64;
822 break;
823 }
824 }
825
826 if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) {
827 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
828 "required object(s) missing from record group\n");
829 nvlist_free(nvl);
830 return (NULL);
831 }
832
833 if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) {
834 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
835 "record is missing required '%s' nvpair\n", FM_CLASS);
836 nvlist_free(nvl);
837 return (NULL);
838 }
839
840 return (fmd_event_recreate(FMD_EVT_PROTOCOL,
841 &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0)));
842 }
843
844 /*
845 * Replay event(s) from the specified log by invoking the specified callback
846 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set,
847 * we replay all events after log_skip that have the FMA_RGROUP group tag.
848 * This mode is used for the error telemetry log. If the log does not have
849 * this flag set (used for ASRU logs), only the most recent event is replayed.
850 */
851 void
fmd_log_replay(fmd_log_t * lp,fmd_log_f * func,void * data)852 fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data)
853 {
854 ea_object_t obj, *grp;
855 ea_object_type_t type;
856 ea_catalog_t c;
857 fmd_event_t *ep;
858 off64_t off, skp;
859 uint_t n = 0;
860
861 (void) pthread_mutex_lock(&lp->log_lock);
862
863 if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) {
864 (void) pthread_mutex_unlock(&lp->log_lock);
865 return; /* we just created this log: never replay events */
866 }
867
868 while (lp->log_flags & FMD_LF_BUSY)
869 (void) pthread_cond_wait(&lp->log_cv, &lp->log_lock);
870
871 if (lp->log_off == lp->log_beg) {
872 (void) pthread_mutex_unlock(&lp->log_lock);
873 return; /* no records appended yet */
874 }
875
876 lp->log_flags |= FMD_LF_BUSY;
877 skp = lp->log_skip;
878 ea_clear(&lp->log_ea); /* resync exacct file */
879
880 /*
881 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it
882 * is non-zero) or at log_beg. Otherwise replay from the end (log_off)
883 */
884 if (lp->log_flags & FMD_LF_REPLAY) {
885 off = MAX(lp->log_beg, lp->log_skip);
886 c = CAT_FMA_RGROUP;
887 } else {
888 off = lp->log_off;
889 c = CAT_FMA_GROUP;
890 }
891
892 if (lseek64(lp->log_fd, off, SEEK_SET) != off) {
893 fmd_panic("failed to seek %s to 0x%llx\n",
894 lp->log_name, (u_longlong_t)off);
895 }
896
897 /*
898 * If FMD_LF_REPLAY is not set, back up to the start of the previous
899 * object and make sure this object is an EO_GROUP; otherwise return.
900 */
901 if (!(lp->log_flags & FMD_LF_REPLAY) &&
902 (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) {
903 fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected "
904 "type %d (log may be truncated or corrupt)\n", type);
905 goto out;
906 }
907
908 while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) {
909 if (!(lp->log_flags & FMD_LF_REPLAY))
910 off -= ea_pack_object(grp, NULL, 0);
911 else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP)
912 skp = off; /* update skip */
913
914 /*
915 * We temporarily drop log_lock around the call to unpack the
916 * event, hold it, and perform the callback, because these
917 * operations may try to acquire log_lock to bump log_refs.
918 * We cannot lose control because the FMD_LF_BUSY flag is set.
919 */
920 (void) pthread_mutex_unlock(&lp->log_lock);
921
922 if (grp->eo_catalog == c &&
923 (ep = fmd_log_unpack(lp, grp, off)) != NULL) {
924
925 TRACE((FMD_DBG_LOG, "replay %s %p off %llx",
926 lp->log_tag, (void *)ep, (u_longlong_t)off));
927
928 fmd_event_hold(ep);
929 func(lp, ep, data);
930 fmd_event_rele(ep);
931 n++;
932 }
933
934 (void) pthread_mutex_lock(&lp->log_lock);
935 off += ea_pack_object(grp, NULL, 0);
936 ea_free_object(grp, EUP_ALLOC);
937 }
938
939 if (ea_error() != EXR_EOF) {
940 fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at "
941 "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off,
942 fmd_ea_strerror(ea_error()));
943 }
944
945 if (n == 0)
946 skp = off; /* if no replays, move skip to where we ended up */
947
948 out:
949 if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) {
950 fmd_panic("failed to seek %s to 0x%llx\n",
951 lp->log_name, (u_longlong_t)lp->log_off);
952 }
953
954 if (skp != lp->log_skip) {
955 lp->log_flags |= FMD_LF_DIRTY;
956 lp->log_skip = skp;
957 }
958
959 lp->log_flags &= ~FMD_LF_BUSY;
960 (void) pthread_cond_broadcast(&lp->log_cv);
961 (void) pthread_mutex_unlock(&lp->log_lock);
962 }
963
964 void
fmd_log_update(fmd_log_t * lp)965 fmd_log_update(fmd_log_t *lp)
966 {
967 ea_object_t toc, item;
968 off64_t skip = 0;
969 size_t size;
970 void *buf;
971
972 (void) pthread_mutex_lock(&lp->log_lock);
973
974 if (lp->log_flags & FMD_LF_DIRTY) {
975 lp->log_flags &= ~FMD_LF_DIRTY;
976 skip = lp->log_skip;
977 }
978
979 (void) pthread_mutex_unlock(&lp->log_lock);
980
981 /*
982 * If the skip needs to be updated, construct a TOC record group
983 * containing the skip offset and overwrite the TOC in-place.
984 */
985 if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 &&
986 ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) {
987
988 (void) ea_attach_to_group(&toc, &item);
989 size = ea_pack_object(&toc, NULL, 0);
990 buf = fmd_alloc(size, FMD_SLEEP);
991
992 (void) ea_pack_object(&toc, buf, size);
993 ASSERT(lp->log_toc + size == lp->log_beg);
994
995 if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) {
996 TRACE((FMD_DBG_LOG, "updated skip to %llx", skip));
997 } else {
998 fmd_error(EFMD_LOG_UPDATE,
999 "failed to log_update %s", lp->log_tag);
1000 }
1001
1002 fmd_free(buf, size);
1003 (void) ea_free_item(&item, EUP_ALLOC);
1004 }
1005 }
1006
1007 /*
1008 * Rotate the specified log by renaming its underlying file to a staging file
1009 * that can be handed off to logadm(1M) or an administrator script. If the
1010 * rename succeeds, open a new log file using the old path and return it.
1011 * Note that we are relying our caller to use some higher-level mechanism to
1012 * ensure that fmd_log_rotate() cannot be called while other threads are
1013 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used
1014 * for the global errlog and fltlog).
1015 */
1016 fmd_log_t *
fmd_log_rotate(fmd_log_t * lp)1017 fmd_log_rotate(fmd_log_t *lp)
1018 {
1019 char npath[PATH_MAX];
1020 fmd_log_t *nlp;
1021
1022 (void) snprintf(npath, sizeof (npath), "%s+", lp->log_name);
1023
1024 /*
1025 * Open new log file.
1026 */
1027 if ((nlp = fmd_log_open("", npath, lp->log_tag)) == NULL) {
1028 fmd_error(EFMD_LOG_ROTATE, "failed to open %s", npath);
1029 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1030 return (NULL);
1031 }
1032
1033 (void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name);
1034 (void) pthread_mutex_lock(&lp->log_lock);
1035
1036 /*
1037 * Check for any pending commits to drain before proceeding. We can't
1038 * rotate the log out if commits are pending because if we die after
1039 * the log is moved aside, we won't be able to replay them on restart.
1040 */
1041 if (lp->log_pending != 0) {
1042 (void) pthread_mutex_unlock(&lp->log_lock);
1043 (void) unlink(nlp->log_name);
1044 fmd_log_rele(nlp);
1045 (void) fmd_set_errno(EFMD_LOG_ROTBUSY);
1046 return (NULL);
1047 }
1048
1049 if (rename(lp->log_name, npath) != 0) {
1050 (void) pthread_mutex_unlock(&lp->log_lock);
1051 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name);
1052 (void) unlink(nlp->log_name);
1053 fmd_log_rele(nlp);
1054 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1055 return (NULL);
1056 }
1057
1058 if (rename(nlp->log_name, lp->log_name) != 0) {
1059 (void) pthread_mutex_unlock(&lp->log_lock);
1060 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s",
1061 nlp->log_name);
1062 (void) unlink(nlp->log_name);
1063 fmd_log_rele(nlp);
1064 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1065 return (NULL);
1066 }
1067
1068 /*
1069 * Change name of new log file
1070 */
1071 fmd_strfree(nlp->log_name);
1072 nlp->log_name = fmd_strdup(lp->log_name, FMD_SLEEP);
1073
1074 /*
1075 * If we've rotated the log, no pending events exist so we don't have
1076 * any more commits coming, and our caller should have arranged for
1077 * no more calls to append. As such, we can close log_fd for good.
1078 */
1079 if (lp->log_flags & FMD_LF_EAOPEN) {
1080 (void) ea_close(&lp->log_ea);
1081 lp->log_flags &= ~FMD_LF_EAOPEN;
1082 }
1083
1084 (void) close(lp->log_fd);
1085 lp->log_fd = -1;
1086
1087 (void) pthread_mutex_unlock(&lp->log_lock);
1088 return (nlp);
1089 }
1090