1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * FMD Log File Subsystem
29 *
30 * Events are written to one of two log files as they are received or created;
31 * the error log tracks all ereport.* events received on the inbound event
32 * transport, and the fault log tracks all list.* events generated by fmd or
33 * its client modules. In addition, we use the same log file format to cache
34 * state and events associated with ASRUs that are named in a diagnosis.
35 *
36 * The log files use the exacct format manipulated by libexacct(3LIB) and
37 * originally defined in PSARC 1999/119. However, the exacct library was
38 * designed primarily for read-only clients and without the synchronous i/o
39 * considerations and seeking required for fmd, so we use libexacct here only
40 * to read and write the file headers and to pack data from memory into a file
41 * bytestream. All of the i/o and file offset manipulations are performed by
42 * the fmd code below. Our exacct file management uses the following grammar:
43 *
44 * file := hdr toc event*
45 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER
46 * EXD_FMA_PLAT EXD_FMA_UUID
47 * toc := EXD_FMA_OFFSET
48 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* or legacy evref
49 * evref := EXD_FMA_UUID EXD_FMA_OFFSET
50 * legacy evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET
51 *
52 * Any event can be uniquely identified by the tuple (file, offset) where file
53 * is encoded as (uuid) when we are cross-linking files. For legacy file
54 * formats we still support encoding the reference as (major, minor, inode).
55 * Note that we break out of the file's dev_t into its two 32-bit components to
56 * permit development of either 32-bit or 64-bit log readers and writers; the
57 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way
58 * for a 32-bit application to retrieve and store a 64-bit dev_t.
59 *
60 * In order to replay events in the event of an fmd crash, events are initially
61 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the
62 * fmd_log_append() function. Later, once an event transitions from the
63 * received state to one of its other states (see fmd_event.c for details),
64 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating
65 * that the event is fully processed and no longer needs to be replayed.
66 */
67
68 #include <sys/types.h>
69 #include <sys/mkdev.h>
70 #include <sys/statvfs.h>
71 #include <sys/fm/protocol.h>
72 #include <sys/exacct_impl.h>
73 #include <uuid/uuid.h>
74
75 #include <unistd.h>
76 #include <limits.h>
77 #include <fcntl.h>
78 #include <ctype.h>
79
80 #include <fmd_alloc.h>
81 #include <fmd_error.h>
82 #include <fmd_string.h>
83 #include <fmd_event.h>
84 #include <fmd_conf.h>
85 #include <fmd_subr.h>
86 #include <fmd_case.h>
87 #include <fmd_log.h>
88
89 #include <fmd.h>
90
91 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA)
92 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA)
93
94 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL)
95 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION)
96 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL)
97 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER)
98 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT)
99 #define CAT_FMA_UUID (EXT_STRING | EXC_DEFAULT | EXD_FMA_UUID)
100 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC)
101 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC)
102 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST)
103 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR)
104 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR)
105 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE)
106 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET)
107
108 static ssize_t
fmd_log_write(fmd_log_t * lp,const void * buf,size_t n)109 fmd_log_write(fmd_log_t *lp, const void *buf, size_t n)
110 {
111 ssize_t resid = n;
112 ssize_t len;
113
114 ASSERT(MUTEX_HELD(&lp->log_lock));
115
116 while (resid != 0) {
117 if ((len = write(lp->log_fd, buf, resid)) <= 0)
118 break;
119
120 resid -= len;
121 buf = (char *)buf + len;
122 }
123
124 if (resid == n && n != 0)
125 return (-1);
126
127 return (n - resid);
128 }
129
130 static int
fmd_log_write_hdr(fmd_log_t * lp,const char * tag)131 fmd_log_write_hdr(fmd_log_t *lp, const char *tag)
132 {
133 ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5, i6;
134 const char *osrel, *osver, *plat;
135 off64_t off = 0;
136 int err = 0;
137 uuid_t uuid;
138
139 (void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel);
140 (void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver);
141 (void) fmd_conf_getprop(fmd.d_conf, "platform", &plat);
142 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &lp->log_uuidlen);
143
144 lp->log_uuid = fmd_zalloc(lp->log_uuidlen + 1, FMD_SLEEP);
145 uuid_generate(uuid);
146 uuid_unparse(uuid, lp->log_uuid);
147
148 err |= ea_set_group(&hdr, CAT_FMA_GROUP);
149 err |= ea_set_group(&toc, CAT_FMA_GROUP);
150
151 err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0);
152 err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0);
153 err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0);
154 err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0);
155 err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0);
156 err |= ea_set_item(&i5, CAT_FMA_UUID, lp->log_uuid, 0);
157 err |= ea_set_item(&i6, CAT_FMA_OFFSET, &off, 0);
158
159 (void) ea_attach_to_group(&hdr, &i0);
160 (void) ea_attach_to_group(&hdr, &i1);
161 (void) ea_attach_to_group(&hdr, &i2);
162 (void) ea_attach_to_group(&hdr, &i3);
163 (void) ea_attach_to_group(&hdr, &i4);
164 (void) ea_attach_to_group(&hdr, &i5);
165 (void) ea_attach_to_group(&toc, &i6);
166
167 if (err == 0) {
168 size_t hdr_size = ea_pack_object(&hdr, NULL, 0);
169 size_t toc_size = ea_pack_object(&toc, NULL, 0);
170
171 size_t size = hdr_size + toc_size;
172 void *buf = fmd_alloc(size, FMD_SLEEP);
173
174 (void) ea_pack_object(&hdr, buf, hdr_size);
175 (void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size);
176
177 if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L)
178 fmd_panic("failed to seek log %s", lp->log_name);
179
180 if (fmd_log_write(lp, buf, size) != size)
181 err = errno; /* save errno for fmd_set_errno() below */
182
183 fmd_free(buf, size);
184
185 lp->log_toc = lp->log_off + hdr_size;
186 lp->log_beg = lp->log_off + hdr_size + toc_size;
187 lp->log_off = lp->log_off + hdr_size + toc_size;
188
189 if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END))
190 fmd_panic("eof off != log_off 0x%llx\n", lp->log_off);
191 } else
192 err = EFMD_LOG_EXACCT;
193
194 (void) ea_free_item(&i0, EUP_ALLOC);
195 (void) ea_free_item(&i1, EUP_ALLOC);
196 (void) ea_free_item(&i2, EUP_ALLOC);
197 (void) ea_free_item(&i3, EUP_ALLOC);
198 (void) ea_free_item(&i4, EUP_ALLOC);
199 (void) ea_free_item(&i5, EUP_ALLOC);
200 (void) ea_free_item(&i6, EUP_ALLOC);
201
202 return (err ? fmd_set_errno(err) : 0);
203 }
204
205 static int
fmd_log_check_err(fmd_log_t * lp,int err,const char * msg)206 fmd_log_check_err(fmd_log_t *lp, int err, const char *msg)
207 {
208 int eaerr = ea_error();
209 char buf[BUFSIZ];
210
211 (void) snprintf(buf, sizeof (buf), "%s: %s: %s\n",
212 lp->log_name, msg, eaerr != EXR_OK ?
213 fmd_ea_strerror(eaerr) : "catalog tag mismatch");
214
215 fmd_error(err, buf);
216 return (fmd_set_errno(err));
217 }
218
219 static int
fmd_log_check_hdr(fmd_log_t * lp,const char * tag)220 fmd_log_check_hdr(fmd_log_t *lp, const char *tag)
221 {
222 int got_version = 0, got_label = 0;
223 ea_object_t *grp, *obj;
224 off64_t hdr_off, hdr_size;
225 int dvers, fvers;
226 const char *p;
227
228 ea_clear(&lp->log_ea); /* resync exacct file */
229
230 if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L)
231 fmd_panic("failed to seek log %s", lp->log_name);
232
233 /*
234 * Read the first group of log meta-data: the write-once read-only
235 * file header. We read all records in this group, ignoring all but
236 * the VERSION and LABEL, which are required and must be verified.
237 */
238 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
239 grp->eo_catalog != CAT_FMA_GROUP) {
240 ea_free_object(grp, EUP_ALLOC);
241 return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
242 "invalid fma hdr record group"));
243 }
244
245 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
246 switch (obj->eo_catalog) {
247 case CAT_FMA_VERSION:
248 for (dvers = 0, p = fmd.d_version;
249 *p != '\0'; p++) {
250 if (isdigit(*p))
251 dvers = dvers * 10 + (*p - '0');
252 else
253 break;
254 }
255
256 for (fvers = 0, p = obj->eo_item.ei_string;
257 *p != '\0'; p++) {
258 if (isdigit(*p))
259 fvers = fvers * 10 + (*p - '0');
260 else
261 break;
262 }
263
264 if (fvers > dvers) {
265 fmd_error(EFMD_LOG_INVAL, "%s: log version "
266 "%s is not supported by this daemon\n",
267 lp->log_name, obj->eo_item.ei_string);
268 ea_free_object(grp, EUP_ALLOC);
269 return (fmd_set_errno(EFMD_LOG_VERSION));
270 }
271
272 got_version++;
273 break;
274
275 case CAT_FMA_LABEL:
276 if (strcmp(obj->eo_item.ei_string, tag) != 0) {
277 fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' "
278 "does not matched expected tag '%s'\n",
279 lp->log_name, obj->eo_item.ei_string, tag);
280 ea_free_object(grp, EUP_ALLOC);
281 return (fmd_set_errno(EFMD_LOG_INVAL));
282 }
283 got_label++;
284 break;
285 case CAT_FMA_UUID:
286 lp->log_uuid = fmd_strdup(obj->eo_item.ei_string,
287 FMD_SLEEP);
288 lp->log_uuidlen = strlen(lp->log_uuid);
289 break;
290 }
291 }
292
293 hdr_size = ea_pack_object(grp, NULL, 0);
294 ea_free_object(grp, EUP_ALLOC);
295
296 if (!got_version || !got_label) {
297 fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not "
298 "include mandatory version and/or label\n", lp->log_name);
299 return (fmd_set_errno(EFMD_LOG_INVAL));
300 }
301
302 /*
303 * Read the second group of log meta-data: the table of contents. We
304 * expect this group to contain an OFFSET object indicating the current
305 * value of log_skip. We save this in our fmd_log_t and then return.
306 */
307 if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
308 grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 ||
309 grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) {
310 ea_free_object(grp, EUP_ALLOC);
311 return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
312 "invalid fma toc record group"));
313 }
314
315 lp->log_toc = hdr_off + hdr_size;
316 lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0);
317 lp->log_off = lseek64(lp->log_fd, 0, SEEK_END);
318 lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64;
319
320 if (lp->log_skip > lp->log_off) {
321 fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; "
322 "resetting to zero\n", lp->log_name, lp->log_skip);
323 lp->log_skip = 0;
324 }
325
326 ea_free_object(grp, EUP_ALLOC);
327 return (0);
328 }
329
330 static int
fmd_log_open_exacct(fmd_log_t * lp,int aflags,int oflags)331 fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags)
332 {
333 int fd = dup(lp->log_fd);
334 const char *creator;
335
336 (void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator);
337
338 if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) {
339 fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n",
340 lp->log_name, fmd_ea_strerror(ea_error()));
341 (void) close(fd);
342 return (fmd_set_errno(EFMD_LOG_EXACCT));
343 }
344
345 lp->log_flags |= FMD_LF_EAOPEN;
346 return (0);
347 }
348
349 static fmd_log_t *
fmd_log_xopen(const char * root,const char * name,const char * tag,int oflags)350 fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags)
351 {
352 fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP);
353
354 char buf[PATH_MAX];
355 char *slash = "/";
356 size_t len;
357 int err;
358
359 (void) pthread_mutex_init(&lp->log_lock, NULL);
360 (void) pthread_cond_init(&lp->log_cv, NULL);
361 (void) pthread_mutex_lock(&lp->log_lock);
362
363 if (strcmp(root, "") == 0)
364 slash = "";
365 len = strlen(root) + strlen(name) + strlen(slash) + 1; /* for "\0" */
366 lp->log_name = fmd_alloc(len, FMD_SLEEP);
367 (void) snprintf(lp->log_name, len, "%s%s%s", root, slash, name);
368 lp->log_tag = fmd_strdup(tag, FMD_SLEEP);
369 (void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree);
370
371 if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0)
372 lp->log_flags |= FMD_LF_REPLAY;
373
374 if (strcmp(lp->log_tag, FMD_LOG_XPRT) == 0)
375 oflags &= ~O_SYNC;
376
377 top:
378 if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 ||
379 fstat64(lp->log_fd, &lp->log_stat) == -1) {
380 fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name);
381 fmd_log_close(lp);
382 return (NULL);
383 }
384
385 /*
386 * If our open() created the log file, use libexacct to write a header
387 * and position the file just after the header (EO_TAIL). If the log
388 * file already existed, use libexacct to validate the header and again
389 * position the file just after the header (EO_HEAD). Note that we lie
390 * to libexacct about 'oflags' in order to achieve the desired result.
391 */
392 if (lp->log_stat.st_size == 0) {
393 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL,
394 O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag);
395 } else {
396 err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD,
397 O_RDONLY) || fmd_log_check_hdr(lp, tag);
398 }
399
400 /*
401 * If ea_fdopen() failed and the log was pre-existing, attempt to move
402 * it aside and start a new one. If we created the log but failed to
403 * initialize it, then we have no choice but to give up (e.g. EROFS).
404 */
405 if (err) {
406 fmd_error(EFMD_LOG_OPEN,
407 "failed to initialize log %s", lp->log_name);
408
409 if (lp->log_flags & FMD_LF_EAOPEN) {
410 lp->log_flags &= ~FMD_LF_EAOPEN;
411 (void) ea_close(&lp->log_ea);
412 }
413
414 (void) close(lp->log_fd);
415 lp->log_fd = -1;
416
417 if (lp->log_stat.st_size != 0 && snprintf(buf,
418 sizeof (buf), "%s-", lp->log_name) < PATH_MAX &&
419 rename(lp->log_name, buf) == 0) {
420 TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf));
421 if (oflags & O_CREAT)
422 goto top;
423 }
424
425 fmd_log_close(lp);
426 return (NULL);
427 }
428
429 lp->log_refs++;
430 (void) pthread_mutex_unlock(&lp->log_lock);
431
432 return (lp);
433 }
434
435 fmd_log_t *
fmd_log_tryopen(const char * root,const char * name,const char * tag)436 fmd_log_tryopen(const char *root, const char *name, const char *tag)
437 {
438 return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC));
439 }
440
441 fmd_log_t *
fmd_log_open(const char * root,const char * name,const char * tag)442 fmd_log_open(const char *root, const char *name, const char *tag)
443 {
444 return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC));
445 }
446
447 void
fmd_log_close(fmd_log_t * lp)448 fmd_log_close(fmd_log_t *lp)
449 {
450 ASSERT(MUTEX_HELD(&lp->log_lock));
451 ASSERT(lp->log_refs == 0);
452
453 if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) {
454 fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n",
455 lp->log_name, fmd_ea_strerror(ea_error()));
456 }
457
458 if (lp->log_fd >= 0 && close(lp->log_fd) != 0) {
459 fmd_error(EFMD_LOG_CLOSE,
460 "failed to close log %s", lp->log_name);
461 }
462
463 fmd_strfree(lp->log_name);
464 fmd_strfree(lp->log_tag);
465 if (lp->log_uuid != NULL)
466 fmd_free(lp->log_uuid, lp->log_uuidlen + 1);
467
468 fmd_free(lp, sizeof (fmd_log_t));
469 }
470
471 void
fmd_log_hold_pending(fmd_log_t * lp)472 fmd_log_hold_pending(fmd_log_t *lp)
473 {
474 (void) pthread_mutex_lock(&lp->log_lock);
475
476 lp->log_refs++;
477 ASSERT(lp->log_refs != 0);
478
479 if (lp->log_flags & FMD_LF_REPLAY) {
480 lp->log_pending++;
481 ASSERT(lp->log_pending != 0);
482 }
483
484 (void) pthread_mutex_unlock(&lp->log_lock);
485 }
486
487 void
fmd_log_hold(fmd_log_t * lp)488 fmd_log_hold(fmd_log_t *lp)
489 {
490 (void) pthread_mutex_lock(&lp->log_lock);
491 lp->log_refs++;
492 ASSERT(lp->log_refs != 0);
493 (void) pthread_mutex_unlock(&lp->log_lock);
494 }
495
496 void
fmd_log_rele(fmd_log_t * lp)497 fmd_log_rele(fmd_log_t *lp)
498 {
499 (void) pthread_mutex_lock(&lp->log_lock);
500 ASSERT(lp->log_refs != 0);
501
502 if (--lp->log_refs == 0)
503 fmd_log_close(lp);
504 else
505 (void) pthread_mutex_unlock(&lp->log_lock);
506 }
507
508 void
fmd_log_append(fmd_log_t * lp,fmd_event_t * e,fmd_case_t * cp)509 fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp)
510 {
511 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
512 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
513 int err = 0;
514
515 ea_object_t grp0, grp1, i0, i1, i2, *items;
516 ea_object_t **fe = NULL;
517 size_t nvsize, easize, itsize, frsize;
518 char *nvbuf, *eabuf;
519 statvfs64_t stv;
520
521 (void) pthread_mutex_lock(&ep->ev_lock);
522
523 ASSERT(ep->ev_flags & FMD_EVF_VOLATILE);
524 ASSERT(ep->ev_log == NULL);
525
526 (void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR);
527 nvbuf = fmd_alloc(nvsize, FMD_SLEEP);
528 (void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
529
530 if (lp->log_flags & FMD_LF_REPLAY)
531 err |= ea_set_group(&grp0, CAT_FMA_RGROUP);
532 else
533 err |= ea_set_group(&grp0, CAT_FMA_GROUP);
534
535 err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0);
536 err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0);
537 err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize);
538
539 if (err != 0) {
540 (void) pthread_mutex_unlock(&ep->ev_lock);
541 err = EFMD_LOG_EXACCT;
542 goto exerr;
543 }
544
545 (void) ea_attach_to_group(&grp0, &i0);
546 (void) ea_attach_to_group(&grp0, &i1);
547 (void) ea_attach_to_group(&grp0, &i2);
548
549 /*
550 * If this event has a case associated with it (i.e. it is a list),
551 * then allocate a block of ea_object_t's and fill in a group for
552 * each event saved in the case's item list. For each such group,
553 * we attach it to grp1, which in turn will be attached to grp0.
554 */
555 if (cp != NULL) {
556 ea_object_t *egrp, *ip, **fp;
557 fmd_event_impl_t *eip;
558 fmd_case_item_t *cit;
559
560 (void) ea_set_group(&grp1, CAT_FMA_GROUP);
561 frsize = sizeof (ea_object_t *) * cip->ci_nitems;
562 itsize = sizeof (ea_object_t) * cip->ci_nitems * 5;
563 items = ip = fmd_alloc(itsize, FMD_SLEEP);
564
565 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
566 major_t maj;
567 minor_t min;
568
569 eip = (fmd_event_impl_t *)cit->cit_event;
570
571 if (eip->ev_log == NULL)
572 continue; /* event was never logged */
573
574 maj = major(eip->ev_log->log_stat.st_dev);
575 min = minor(eip->ev_log->log_stat.st_dev);
576
577 (void) ea_set_group(ip, CAT_FMA_GROUP);
578 egrp = ip++; /* first obj is group */
579
580 /*
581 * If the event log file is in legacy format,
582 * then write the xref to the file in the legacy
583 * maj/min/inode method else write it using the
584 * file uuid.
585 */
586 if (eip->ev_log->log_uuid == NULL) {
587 (void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0);
588 (void) ea_attach_to_group(egrp, ip++);
589 (void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0);
590 (void) ea_attach_to_group(egrp, ip++);
591 (void) ea_set_item(ip, CAT_FMA_INODE,
592 &eip->ev_log->log_stat.st_ino, 0);
593 (void) ea_attach_to_group(egrp, ip++);
594 } else {
595 if (ea_set_item(ip, CAT_FMA_UUID,
596 eip->ev_log->log_uuid, 0) == -1) {
597 err = EFMD_LOG_EXACCT;
598 goto exerrcp;
599 }
600 if (fe == NULL)
601 fe = fp = fmd_zalloc(frsize, FMD_SLEEP);
602 *fp++ = ip;
603 (void) ea_attach_to_group(egrp, ip++);
604 }
605 (void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0);
606 (void) ea_attach_to_group(egrp, ip++);
607 (void) ea_attach_to_group(&grp1, egrp);
608 }
609 (void) ea_attach_to_group(&grp0, &grp1);
610 }
611
612 easize = ea_pack_object(&grp0, NULL, 0);
613 eabuf = fmd_alloc(easize, FMD_SLEEP);
614 (void) ea_pack_object(&grp0, eabuf, easize);
615
616 /*
617 * Before writing the record, check to see if this would cause the free
618 * space in the filesystem to drop below our minfree threshold. If so,
619 * don't bother attempting the write and instead pretend it failed. As
620 * fmd(8) runs as root, it will be able to access the space "reserved"
621 * for root, and therefore can run the system of out of disk space in a
622 * heavy error load situation, violating the basic design principle of
623 * fmd(8) that we don't want to make a bad situation even worse.
624 */
625 (void) pthread_mutex_lock(&lp->log_lock);
626
627 if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 &&
628 stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) {
629
630 TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag));
631 err = EFMD_LOG_MINFREE;
632
633 } else if (fmd_log_write(lp, eabuf, easize) == easize) {
634 TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx",
635 lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off));
636
637 ep->ev_flags &= ~FMD_EVF_VOLATILE;
638 ep->ev_log = lp;
639 ep->ev_off = lp->log_off;
640 ep->ev_len = easize;
641
642 if (lp->log_flags & FMD_LF_REPLAY) {
643 lp->log_pending++;
644 ASSERT(lp->log_pending != 0);
645 }
646
647 lp->log_refs++;
648 ASSERT(lp->log_refs != 0);
649 lp->log_off += easize;
650 } else {
651 err = errno; /* save errno for fmd_error() call below */
652
653 /*
654 * If we can't write append the record, seek the file back to
655 * the original location and truncate it there in order to make
656 * sure the file is always in a sane state w.r.t. libexacct.
657 */
658 (void) lseek64(lp->log_fd, lp->log_off, SEEK_SET);
659 (void) ftruncate64(lp->log_fd, lp->log_off);
660 }
661
662 (void) pthread_mutex_unlock(&lp->log_lock);
663 (void) pthread_mutex_unlock(&ep->ev_lock);
664
665 fmd_free(eabuf, easize);
666
667 exerrcp:
668 if (cp != NULL) {
669 if (fe != NULL) {
670 ea_object_t **fp = fe;
671 int i = 0;
672
673 for (; *fp != NULL && i < cip->ci_nitems; i++)
674 (void) ea_free_item(*fp++, EUP_ALLOC);
675 fmd_free(fe, frsize);
676 }
677
678 fmd_free(items, itsize);
679 }
680
681 exerr:
682 fmd_free(nvbuf, nvsize);
683
684 (void) ea_free_item(&i0, EUP_ALLOC);
685 (void) ea_free_item(&i1, EUP_ALLOC);
686 (void) ea_free_item(&i2, EUP_ALLOC);
687
688 /*
689 * Keep track of out-of-space errors using global statistics. As we're
690 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged.
691 */
692 if (err == ENOSPC || err == EFMD_LOG_MINFREE) {
693 fmd_stat_t *sp;
694
695 if (lp == fmd.d_errlog)
696 sp = &fmd.d_stats->ds_err_enospc;
697 else if (lp == fmd.d_fltlog)
698 sp = &fmd.d_stats->ds_flt_enospc;
699 else
700 sp = &fmd.d_stats->ds_oth_enospc;
701
702 (void) pthread_mutex_lock(&fmd.d_stats_lock);
703 sp->fmds_value.ui64++;
704 (void) pthread_mutex_unlock(&fmd.d_stats_lock);
705 }
706
707 if (err != 0) {
708 fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n",
709 lp->log_tag, (void *)ep, fmd_strerror(err));
710 }
711 }
712
713 /*
714 * Commit an event to the log permanently, indicating that it should not be
715 * replayed on restart. This is done by overwriting the event group's catalog
716 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We
717 * use pwrite64() to update the existing word directly, using somewhat guilty
718 * knowledge that exacct stores the 32-bit catalog word first for each object.
719 * Since we are overwriting an existing log location using pwrite64() and hold
720 * the event lock, we do not need to hold the log_lock during the i/o.
721 */
722 void
fmd_log_commit(fmd_log_t * lp,fmd_event_t * e)723 fmd_log_commit(fmd_log_t *lp, fmd_event_t *e)
724 {
725 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
726 ea_catalog_t c;
727 int err = 0;
728
729 if (!(lp->log_flags & FMD_LF_REPLAY))
730 return; /* log does not require replay tagging */
731
732 ASSERT(MUTEX_HELD(&ep->ev_lock));
733 ASSERT(ep->ev_log == lp && ep->ev_off != 0);
734
735 c = CAT_FMA_GROUP;
736 exacct_order32(&c);
737
738 if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) {
739 TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep));
740 ep->ev_flags &= ~FMD_EVF_REPLAY;
741
742 /*
743 * If we have committed the event, check to see if the TOC skip
744 * offset needs to be updated, and decrement the pending count.
745 */
746 (void) pthread_mutex_lock(&lp->log_lock);
747
748 if (lp->log_skip == ep->ev_off) {
749 lp->log_flags |= FMD_LF_DIRTY;
750 lp->log_skip += ep->ev_len;
751 }
752
753 ASSERT(lp->log_pending != 0);
754 lp->log_pending--;
755
756 (void) pthread_cond_broadcast(&lp->log_cv);
757 (void) pthread_mutex_unlock(&lp->log_lock);
758
759 } else {
760 fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n",
761 lp->log_tag, (void *)ep, fmd_strerror(err));
762 }
763 }
764
765 /*
766 * If we need to destroy an event and it wasn't able to be committed, we permit
767 * the owner to decommit from ever trying again. This operation decrements the
768 * pending count on the log and broadcasts to anyone waiting on log_cv.
769 */
770 void
fmd_log_decommit(fmd_log_t * lp,fmd_event_t * e)771 fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e)
772 {
773 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
774
775 if (!(lp->log_flags & FMD_LF_REPLAY))
776 return; /* log does not require replay tagging */
777
778 ASSERT(MUTEX_HELD(&ep->ev_lock));
779 ASSERT(ep->ev_log == lp);
780
781 (void) pthread_mutex_lock(&lp->log_lock);
782
783 TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep));
784 ep->ev_flags &= ~FMD_EVF_REPLAY;
785
786 ASSERT(lp->log_pending != 0);
787 lp->log_pending--;
788
789 (void) pthread_cond_broadcast(&lp->log_cv);
790 (void) pthread_mutex_unlock(&lp->log_lock);
791 }
792
793 static fmd_event_t *
fmd_log_unpack(fmd_log_t * lp,ea_object_t * grp,off64_t off)794 fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off)
795 {
796 fmd_timeval_t ftv = { -1ULL, -1ULL };
797 nvlist_t *nvl = NULL;
798
799 ea_object_t *obj;
800 char *class;
801 int err;
802
803 for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
804 switch (obj->eo_catalog) {
805 case CAT_FMA_NVLIST:
806 if ((err = nvlist_xunpack(obj->eo_item.ei_raw,
807 obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) {
808 fmd_error(EFMD_LOG_UNPACK, "failed to unpack "
809 "log nvpair: %s\n", fmd_strerror(err));
810 return (NULL);
811 }
812 break;
813
814 case CAT_FMA_TODSEC:
815 ftv.ftv_sec = obj->eo_item.ei_uint64;
816 break;
817
818 case CAT_FMA_TODNSEC:
819 ftv.ftv_nsec = obj->eo_item.ei_uint64;
820 break;
821 }
822 }
823
824 if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) {
825 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
826 "required object(s) missing from record group\n");
827 nvlist_free(nvl);
828 return (NULL);
829 }
830
831 if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) {
832 fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
833 "record is missing required '%s' nvpair\n", FM_CLASS);
834 nvlist_free(nvl);
835 return (NULL);
836 }
837
838 return (fmd_event_recreate(FMD_EVT_PROTOCOL,
839 &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0)));
840 }
841
842 /*
843 * Replay event(s) from the specified log by invoking the specified callback
844 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set,
845 * we replay all events after log_skip that have the FMA_RGROUP group tag.
846 * This mode is used for the error telemetry log. If the log does not have
847 * this flag set (used for ASRU logs), only the most recent event is replayed.
848 */
849 void
fmd_log_replay(fmd_log_t * lp,fmd_log_f * func,void * data)850 fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data)
851 {
852 ea_object_t obj, *grp;
853 ea_object_type_t type;
854 ea_catalog_t c;
855 fmd_event_t *ep;
856 off64_t off, skp;
857 uint_t n = 0;
858
859 (void) pthread_mutex_lock(&lp->log_lock);
860
861 if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) {
862 (void) pthread_mutex_unlock(&lp->log_lock);
863 return; /* we just created this log: never replay events */
864 }
865
866 while (lp->log_flags & FMD_LF_BUSY)
867 (void) pthread_cond_wait(&lp->log_cv, &lp->log_lock);
868
869 if (lp->log_off == lp->log_beg) {
870 (void) pthread_mutex_unlock(&lp->log_lock);
871 return; /* no records appended yet */
872 }
873
874 lp->log_flags |= FMD_LF_BUSY;
875 skp = lp->log_skip;
876 ea_clear(&lp->log_ea); /* resync exacct file */
877
878 /*
879 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it
880 * is non-zero) or at log_beg. Otherwise replay from the end (log_off)
881 */
882 if (lp->log_flags & FMD_LF_REPLAY) {
883 off = MAX(lp->log_beg, lp->log_skip);
884 c = CAT_FMA_RGROUP;
885 } else {
886 off = lp->log_off;
887 c = CAT_FMA_GROUP;
888 }
889
890 if (lseek64(lp->log_fd, off, SEEK_SET) != off) {
891 fmd_panic("failed to seek %s to 0x%llx\n",
892 lp->log_name, (u_longlong_t)off);
893 }
894
895 /*
896 * If FMD_LF_REPLAY is not set, back up to the start of the previous
897 * object and make sure this object is an EO_GROUP; otherwise return.
898 */
899 if (!(lp->log_flags & FMD_LF_REPLAY) &&
900 (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) {
901 fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected "
902 "type %d (log may be truncated or corrupt)\n", type);
903 goto out;
904 }
905
906 while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) {
907 if (!(lp->log_flags & FMD_LF_REPLAY))
908 off -= ea_pack_object(grp, NULL, 0);
909 else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP)
910 skp = off; /* update skip */
911
912 /*
913 * We temporarily drop log_lock around the call to unpack the
914 * event, hold it, and perform the callback, because these
915 * operations may try to acquire log_lock to bump log_refs.
916 * We cannot lose control because the FMD_LF_BUSY flag is set.
917 */
918 (void) pthread_mutex_unlock(&lp->log_lock);
919
920 if (grp->eo_catalog == c &&
921 (ep = fmd_log_unpack(lp, grp, off)) != NULL) {
922
923 TRACE((FMD_DBG_LOG, "replay %s %p off %llx",
924 lp->log_tag, (void *)ep, (u_longlong_t)off));
925
926 fmd_event_hold(ep);
927 func(lp, ep, data);
928 fmd_event_rele(ep);
929 n++;
930 }
931
932 (void) pthread_mutex_lock(&lp->log_lock);
933 off += ea_pack_object(grp, NULL, 0);
934 ea_free_object(grp, EUP_ALLOC);
935 }
936
937 if (ea_error() != EXR_EOF) {
938 fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at "
939 "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off,
940 fmd_ea_strerror(ea_error()));
941 }
942
943 if (n == 0)
944 skp = off; /* if no replays, move skip to where we ended up */
945
946 out:
947 if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) {
948 fmd_panic("failed to seek %s to 0x%llx\n",
949 lp->log_name, (u_longlong_t)lp->log_off);
950 }
951
952 if (skp != lp->log_skip) {
953 lp->log_flags |= FMD_LF_DIRTY;
954 lp->log_skip = skp;
955 }
956
957 lp->log_flags &= ~FMD_LF_BUSY;
958 (void) pthread_cond_broadcast(&lp->log_cv);
959 (void) pthread_mutex_unlock(&lp->log_lock);
960 }
961
962 void
fmd_log_update(fmd_log_t * lp)963 fmd_log_update(fmd_log_t *lp)
964 {
965 ea_object_t toc, item;
966 off64_t skip = 0;
967 size_t size;
968 void *buf;
969
970 (void) pthread_mutex_lock(&lp->log_lock);
971
972 if (lp->log_flags & FMD_LF_DIRTY) {
973 lp->log_flags &= ~FMD_LF_DIRTY;
974 skip = lp->log_skip;
975 }
976
977 (void) pthread_mutex_unlock(&lp->log_lock);
978
979 /*
980 * If the skip needs to be updated, construct a TOC record group
981 * containing the skip offset and overwrite the TOC in-place.
982 */
983 if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 &&
984 ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) {
985
986 (void) ea_attach_to_group(&toc, &item);
987 size = ea_pack_object(&toc, NULL, 0);
988 buf = fmd_alloc(size, FMD_SLEEP);
989
990 (void) ea_pack_object(&toc, buf, size);
991 ASSERT(lp->log_toc + size == lp->log_beg);
992
993 if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) {
994 TRACE((FMD_DBG_LOG, "updated skip to %llx", skip));
995 } else {
996 fmd_error(EFMD_LOG_UPDATE,
997 "failed to log_update %s", lp->log_tag);
998 }
999
1000 fmd_free(buf, size);
1001 (void) ea_free_item(&item, EUP_ALLOC);
1002 }
1003 }
1004
1005 /*
1006 * Rotate the specified log by renaming its underlying file to a staging file
1007 * that can be handed off to logadm(8) or an administrator script. If the
1008 * rename succeeds, open a new log file using the old path and return it.
1009 * Note that we are relying our caller to use some higher-level mechanism to
1010 * ensure that fmd_log_rotate() cannot be called while other threads are
1011 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used
1012 * for the global errlog and fltlog).
1013 */
1014 fmd_log_t *
fmd_log_rotate(fmd_log_t * lp)1015 fmd_log_rotate(fmd_log_t *lp)
1016 {
1017 char npath[PATH_MAX];
1018 fmd_log_t *nlp;
1019
1020 (void) snprintf(npath, sizeof (npath), "%s+", lp->log_name);
1021
1022 /*
1023 * Open new log file.
1024 */
1025 if ((nlp = fmd_log_open("", npath, lp->log_tag)) == NULL) {
1026 fmd_error(EFMD_LOG_ROTATE, "failed to open %s", npath);
1027 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1028 return (NULL);
1029 }
1030
1031 (void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name);
1032 (void) pthread_mutex_lock(&lp->log_lock);
1033
1034 /*
1035 * Check for any pending commits to drain before proceeding. We can't
1036 * rotate the log out if commits are pending because if we die after
1037 * the log is moved aside, we won't be able to replay them on restart.
1038 */
1039 if (lp->log_pending != 0) {
1040 (void) pthread_mutex_unlock(&lp->log_lock);
1041 (void) unlink(nlp->log_name);
1042 fmd_log_rele(nlp);
1043 (void) fmd_set_errno(EFMD_LOG_ROTBUSY);
1044 return (NULL);
1045 }
1046
1047 if (rename(lp->log_name, npath) != 0) {
1048 (void) pthread_mutex_unlock(&lp->log_lock);
1049 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name);
1050 (void) unlink(nlp->log_name);
1051 fmd_log_rele(nlp);
1052 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1053 return (NULL);
1054 }
1055
1056 if (rename(nlp->log_name, lp->log_name) != 0) {
1057 (void) pthread_mutex_unlock(&lp->log_lock);
1058 fmd_error(EFMD_LOG_ROTATE, "failed to rename %s",
1059 nlp->log_name);
1060 (void) unlink(nlp->log_name);
1061 fmd_log_rele(nlp);
1062 (void) fmd_set_errno(EFMD_LOG_ROTATE);
1063 return (NULL);
1064 }
1065
1066 /*
1067 * Change name of new log file
1068 */
1069 fmd_strfree(nlp->log_name);
1070 nlp->log_name = fmd_strdup(lp->log_name, FMD_SLEEP);
1071
1072 /*
1073 * If we've rotated the log, no pending events exist so we don't have
1074 * any more commits coming, and our caller should have arranged for
1075 * no more calls to append. As such, we can close log_fd for good.
1076 */
1077 if (lp->log_flags & FMD_LF_EAOPEN) {
1078 (void) ea_close(&lp->log_ea);
1079 lp->log_flags &= ~FMD_LF_EAOPEN;
1080 }
1081
1082 (void) close(lp->log_fd);
1083 lp->log_fd = -1;
1084
1085 (void) pthread_mutex_unlock(&lp->log_lock);
1086 return (nlp);
1087 }
1088