xref: /titanic_52/usr/src/cmd/fm/fmd/common/fmd_ckpt.c (revision 5bb86dd8f405a48942aaaab3ca1f410ed7e6db4d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/mkdev.h>
31 #include <sys/stat.h>
32 
33 #include <strings.h>
34 #include <unistd.h>
35 #include <limits.h>
36 #include <fcntl.h>
37 
38 #include <fmd_module.h>
39 #include <fmd_error.h>
40 #include <fmd_alloc.h>
41 #include <fmd_case.h>
42 #include <fmd_serd.h>
43 #include <fmd_subr.h>
44 #include <fmd_conf.h>
45 #include <fmd_event.h>
46 #include <fmd_log.h>
47 #include <fmd_api.h>
48 #include <fmd_ckpt.h>
49 
50 #include <fmd.h>
51 
52 #define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
53 #define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
54 
55 /*
56  * The fmd_ckpt_t structure is used to manage all of the state needed by the
57  * various subroutines that save and restore checkpoints.  The structure is
58  * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
59  * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
60  */
61 typedef struct fmd_ckpt {
62 	char ckp_src[PATH_MAX];	/* ckpt input or output filename */
63 	char ckp_dst[PATH_MAX];	/* ckpt rename filename */
64 	uchar_t *ckp_buf;	/* data buffer base address */
65 	fcf_hdr_t *ckp_hdr;	/* file header pointer */
66 	uchar_t *ckp_ptr;	/* data buffer pointer */
67 	size_t ckp_size;	/* data buffer size */
68 	fcf_sec_t *ckp_secp;	/* section header table pointer */
69 	fcf_sec_t *ckp_modp;	/* section header for module */
70 	uint_t ckp_secs;	/* number of sections */
71 	char *ckp_strs;		/* string table base pointer */
72 	char *ckp_strp;		/* string table pointer */
73 	size_t ckp_strn;	/* string table size */
74 	int ckp_fd;		/* output descriptor */
75 	fmd_module_t *ckp_mp;	/* checkpoint module */
76 	void *ckp_arg;		/* private arg for callbacks */
77 } fmd_ckpt_t;
78 
79 typedef struct fmd_ckpt_desc {
80 	uint64_t secd_size;	/* minimum section size */
81 	uint32_t secd_entsize;	/* minimum section entry size */
82 	uint32_t secd_align;	/* section alignment */
83 } fmd_ckpt_desc_t;
84 
85 /*
86  * Table of FCF section descriptions.  Here we record the minimum size for each
87  * section (for use during restore) and the expected entry size and alignment
88  * for each section (for use during both checkpoint and restore).
89  */
90 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
91 { 0, 0, sizeof (uint8_t) },					   /* NONE */
92 { 1, 0, sizeof (char) },					   /* STRTAB */
93 { sizeof (fcf_module_t), 0, sizeof (uint32_t) },		   /* MODULE */
94 { sizeof (fcf_case_t), 0, sizeof (uint32_t) },			   /* CASE */
95 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },	   /* BUFS */
96 { 0, 0, _MAX_ALIGNMENT },					   /* BUFFER */
97 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
98 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
99 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },	   /* NVLISTS */
100 };
101 
102 static int
103 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
104 {
105 	const char *dir = mp->mod_ckpt;
106 	const char *name = mp->mod_name;
107 	mode_t mode;
108 
109 	bzero(ckp, sizeof (fmd_ckpt_t));
110 	ckp->ckp_mp = mp;
111 
112 	ckp->ckp_size = sizeof (fcf_hdr_t);
113 	ckp->ckp_strn = 1; /* for \0 */
114 
115 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
116 	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
117 
118 	(void) unlink(ckp->ckp_src);
119 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
120 	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
121 
122 	return (ckp->ckp_fd);
123 }
124 
125 /*PRINTFLIKE2*/
126 static int
127 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
128 {
129 	va_list ap;
130 
131 	va_start(ap, format);
132 	fmd_verror(EFMD_CKPT_INVAL, format, ap);
133 	va_end(ap);
134 
135 	fmd_free(ckp->ckp_buf, ckp->ckp_size);
136 	return (fmd_set_errno(EFMD_CKPT_INVAL));
137 }
138 
139 static int
140 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
141 {
142 	struct stat64 st;
143 	uint64_t seclen;
144 	uint_t i;
145 	int err;
146 
147 	bzero(ckp, sizeof (fmd_ckpt_t));
148 	ckp->ckp_mp = mp;
149 
150 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
151 	    mp->mod_ckpt, mp->mod_name);
152 
153 	if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
154 		return (-1); /* failed to open checkpoint file */
155 
156 	if (fstat64(ckp->ckp_fd, &st) == -1) {
157 		err = errno;
158 		(void) close(ckp->ckp_fd);
159 		return (fmd_set_errno(err));
160 	}
161 
162 	ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
163 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
164 	ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
165 
166 	if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
167 	    ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
168 		err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
169 		fmd_free(ckp->ckp_buf, st.st_size);
170 		(void) close(ckp->ckp_fd);
171 		return (fmd_set_errno(err));
172 	}
173 
174 	(void) close(ckp->ckp_fd);
175 	ckp->ckp_fd = -1;
176 
177 	/*
178 	 * Once we've read in a consistent copy of the FCF file and we're sure
179 	 * the header can be accessed, go through it and make sure everything
180 	 * is valid.  We also check that unused bits are zero so we can expand
181 	 * to use them safely in the future and support old files if needed.
182 	 */
183 	if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
184 	    FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
185 		return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
186 
187 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
188 		return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
189 
190 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
191 		return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
192 
193 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
194 		return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
195 		    ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
196 	}
197 
198 	for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
199 		if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
200 			return (fmd_ckpt_inval(ckp,
201 			    "bad checkpoint padding at id[%d]", i));
202 		}
203 	}
204 
205 	if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
206 		return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
207 
208 	if (ckp->ckp_hdr->fcfh_pad != 0)
209 		return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
210 
211 	if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
212 	    ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
213 		return (fmd_ckpt_inval(ckp,
214 		    "bad header and/or section size\n"));
215 	}
216 
217 	seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
218 	    (uint64_t)ckp->ckp_hdr->fcfh_secsize;
219 
220 	if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
221 	    seclen > ckp->ckp_size ||
222 	    ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
223 	    ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
224 		return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
225 
226 	if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
227 	    !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
228 		return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
229 
230 	/*
231 	 * Once the header is validated, iterate over the section headers
232 	 * ensuring that each one is valid w.r.t. offset, alignment, and size.
233 	 * We also pick up the string table pointer during this pass.
234 	 */
235 	ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
236 	ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
237 
238 	for (i = 0; i < ckp->ckp_secs; i++) {
239 		fcf_sec_t *sp = (void *)(ckp->ckp_buf +
240 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
241 
242 		const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
243 
244 		if (sp->fcfs_flags != 0) {
245 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
246 			    "section flags (0x%x)\n", i, sp->fcfs_flags));
247 		}
248 
249 		if (sp->fcfs_align & (sp->fcfs_align - 1)) {
250 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
251 			    "alignment (%u)\n", i, sp->fcfs_align));
252 		}
253 
254 		if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
255 			return (fmd_ckpt_inval(ckp, "section %u is not properly"
256 			    " aligned (offset %llu)\n", i, sp->fcfs_offset));
257 		}
258 
259 		if (sp->fcfs_entsize != 0 &&
260 		    (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
261 			return (fmd_ckpt_inval(ckp, "section %u has misaligned "
262 			    "entsize %u\n", i, sp->fcfs_entsize));
263 		}
264 
265 		if (sp->fcfs_offset > ckp->ckp_size ||
266 		    sp->fcfs_size > ckp->ckp_size ||
267 		    sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
268 		    sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
269 			return (fmd_ckpt_inval(ckp, "section %u has corrupt "
270 			    "size or offset\n", i));
271 		}
272 
273 		if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
274 		    sizeof (_fmd_ckpt_sections[0])) {
275 			return (fmd_ckpt_inval(ckp, "section %u has unknown "
276 			    "section type %u\n", i, sp->fcfs_type));
277 		}
278 
279 		if (sp->fcfs_align != dp->secd_align) {
280 			return (fmd_ckpt_inval(ckp, "section %u has align %u "
281 			    "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
282 		}
283 
284 		if (sp->fcfs_size < dp->secd_size ||
285 		    sp->fcfs_entsize < dp->secd_entsize) {
286 			return (fmd_ckpt_inval(ckp, "section %u has short "
287 			    "size or entsize\n", i));
288 		}
289 
290 		switch (sp->fcfs_type) {
291 		case FCF_SECT_STRTAB:
292 			if (ckp->ckp_strs != NULL) {
293 				return (fmd_ckpt_inval(ckp, "multiple string "
294 				    "tables are present in checkpoint file\n"));
295 			}
296 
297 			ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
298 			ckp->ckp_strn = sp->fcfs_size;
299 
300 			if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
301 				return (fmd_ckpt_inval(ckp, "string table %u "
302 				    "is missing terminating nul byte\n", i));
303 			}
304 			break;
305 
306 		case FCF_SECT_MODULE:
307 			if (ckp->ckp_modp != NULL) {
308 				return (fmd_ckpt_inval(ckp, "multiple module "
309 				    "sects are present in checkpoint file\n"));
310 			}
311 			ckp->ckp_modp = sp;
312 			break;
313 		}
314 	}
315 
316 	/*
317 	 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
318 	 * This is done to ensure that links can use index 0 as a null section.
319 	 */
320 	if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
321 	    ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
322 		return (fmd_ckpt_inval(ckp, "section 0 is not of the "
323 		    "appropriate size and/or attributes (SECT_NONE)\n"));
324 	}
325 
326 	if (ckp->ckp_modp == NULL) {
327 		return (fmd_ckpt_inval(ckp,
328 		    "no module section found in file\n"));
329 	}
330 
331 	return (0);
332 }
333 
334 static void
335 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
336 {
337 	if (ckp->ckp_buf != NULL)
338 		fmd_free(ckp->ckp_buf, ckp->ckp_size);
339 	if (ckp->ckp_fd >= 0)
340 		(void) close(ckp->ckp_fd);
341 }
342 
343 /*
344  * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
345  * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
346  * then aborts the API call and the surrounding module entry point by doing an
347  * fmd_module_abort(), which longjmps to the place where we entered the module.
348  * Depending on the type of error and conf settings, we will reset or fail.
349  */
350 /*PRINTFLIKE3*/
351 static void
352 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
353 {
354 	fmd_module_t *mp = ckp->ckp_mp;
355 	va_list ap;
356 
357 	va_start(ap, format);
358 	fmd_verror(err, format, ap);
359 	va_end(ap);
360 
361 	if (fmd_module_locked(mp))
362 		fmd_module_unlock(mp);
363 
364 	fmd_ckpt_destroy(ckp);
365 	fmd_module_abort(mp, err);
366 }
367 
368 static fcf_secidx_t
369 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
370 {
371 	const fmd_ckpt_desc_t *dp;
372 
373 	ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
374 	dp = &_fmd_ckpt_sections[type];
375 
376 	ckp->ckp_ptr = (uchar_t *)
377 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
378 
379 	ckp->ckp_secp->fcfs_type = type;
380 	ckp->ckp_secp->fcfs_align = dp->secd_align;
381 	ckp->ckp_secp->fcfs_flags = 0;
382 	ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
383 	ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
384 	ckp->ckp_secp->fcfs_size = size;
385 
386 	/*
387 	 * If the data pointer is non-NULL, copy the data to our buffer; else
388 	 * the caller is responsible for doing so and updating ckp->ckp_ptr.
389 	 */
390 	if (data != NULL) {
391 		bcopy(data, ckp->ckp_ptr, size);
392 		ckp->ckp_ptr += size;
393 	}
394 
395 	ckp->ckp_secp++;
396 	return (ckp->ckp_secs++);
397 }
398 
399 static fcf_stridx_t
400 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
401 {
402 	fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
403 
404 	(void) strcpy(ckp->ckp_strp, s);
405 	ckp->ckp_strp += strlen(s) + 1;
406 
407 	return (idx);
408 }
409 
410 static int
411 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
412 {
413 	/*
414 	 * We've added up all the sections by now: add two more for SECT_NONE
415 	 * and SECT_STRTAB, and add the size of the section header table and
416 	 * string table to the total size.  We know that the fcf_hdr_t is
417 	 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
418 	 * is aligned so that any section can follow it, so no extra padding
419 	 * bytes need to be allocated between any of these items.
420 	 */
421 	ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
422 	ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
423 	ckp->ckp_size += ckp->ckp_strn;
424 
425 	TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
426 	ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
427 
428 	if (ckp->ckp_buf == NULL)
429 		return (-1); /* errno is set for us */
430 
431 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
432 
433 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
434 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
435 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
436 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
437 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
438 	ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
439 	ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
440 
441 	ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
442 	ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
443 	ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
444 	ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
445 	ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
446 	ckp->ckp_hdr->fcfh_cgen = gen;
447 
448 	ckp->ckp_secs = 0; /* reset section counter for second pass */
449 	ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
450 	ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
451 	ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
452 	ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
453 
454 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
455 	return (0);
456 }
457 
458 static int
459 fmd_ckpt_commit(fmd_ckpt_t *ckp)
460 {
461 	fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
462 	size_t stroff = ckp->ckp_size - ckp->ckp_strn;
463 
464 	/*
465 	 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
466 	 * and current pointer locations all add up appropriately.  Any ASSERTs
467 	 * which trip here likely indicate an inconsistency in the code for the
468 	 * reservation pass and the buffer update pass of the FCF subroutines.
469 	 */
470 	ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
471 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
472 	ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
473 
474 	ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
475 	ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
476 	ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
477 
478 	if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
479 	    fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
480 		return (-1); /* errno is set for us */
481 
482 	ckp->ckp_fd = -1; /* fd is now closed */
483 	return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
484 }
485 
486 static void
487 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
488 {
489 	if (size != 0) {
490 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
491 		ckp->ckp_secs++;
492 	}
493 }
494 
495 static void
496 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
497 {
498 	ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
499 	ckp->ckp_strn += strlen(bp->buf_name) + 1;
500 	ckp->ckp_secs++;
501 }
502 
503 static void
504 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
505 {
506 	fcf_buf_t *fcfb = ckp->ckp_arg;
507 
508 	fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
509 	fcfb->fcfb_data = fmd_ckpt_section(ckp,
510 	    bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
511 
512 	ckp->ckp_arg = fcfb + 1;
513 }
514 
515 static void
516 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
517 {
518 	fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
519 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
520 	fmd_log_t *lp = ep->ev_log;
521 
522 	fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
523 	fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
524 	fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
525 	fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
526 	fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
527 	fcfe->fcfe_offset = ep->ev_off;
528 
529 	ckp->ckp_ptr += sizeof (fcf_event_t);
530 }
531 
532 static void
533 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
534 {
535 	fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
536 	char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
537 	size_t nvsize = 0;
538 
539 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
540 	fcfn->fcfn_size = (uint64_t)nvsize;
541 
542 	(void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
543 	ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
544 
545 	ckp->ckp_ptr = (uchar_t *)
546 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
547 }
548 
549 static void
550 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
551 {
552 	fmd_ckpt_resv(ckp,
553 	    sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
554 
555 	ckp->ckp_strn += strlen(sgp->sg_name) + 1;
556 }
557 
558 static void
559 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
560 {
561 	fcf_serd_t *fcfd = ckp->ckp_arg;
562 	fcf_secidx_t evsec = FCF_SECT_NONE;
563 	fmd_serd_elem_t *sep;
564 
565 	if (sgp->sg_count != 0) {
566 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
567 		    sizeof (fcf_event_t) * sgp->sg_count);
568 
569 		for (sep = fmd_list_next(&sgp->sg_list);
570 		    sep != NULL; sep = fmd_list_next(sep))
571 			fmd_ckpt_save_event(ckp, sep->se_event);
572 	}
573 
574 	fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
575 	fcfd->fcfd_events = evsec;
576 	fcfd->fcfd_pad = 0;
577 	fcfd->fcfd_n = sgp->sg_n;
578 	fcfd->fcfd_t = sgp->sg_t;
579 
580 	ckp->ckp_arg = fcfd + 1;
581 }
582 
583 static void
584 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
585 {
586 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
587 	fmd_case_susp_t *cis;
588 	uint_t n;
589 
590 	if (cip->ci_xprt != NULL)
591 		return; /* do not checkpoint cases from remote transports */
592 
593 	n = fmd_buf_hash_count(&cip->ci_bufs);
594 	fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
595 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
596 
597 	if (cip->ci_principal != NULL)
598 		fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
599 
600 	fmd_ckpt_resv(ckp,
601 	    sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
602 
603 	if (cip->ci_nsuspects != 0)
604 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
605 
606 	cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
607 
608 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
609 		size_t nvsize = 0;
610 
611 		(void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
612 		cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
613 		cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
614 	}
615 
616 	fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
617 	fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
618 	ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
619 }
620 
621 static void
622 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
623 {
624 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
625 
626 	fmd_case_item_t *cit;
627 	fmd_case_susp_t *cis;
628 	fcf_case_t fcfc;
629 	uint_t n;
630 
631 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
632 	fcf_secidx_t evsec = FCF_SECIDX_NONE;
633 	fcf_secidx_t nvsec = FCF_SECIDX_NONE;
634 	fcf_secidx_t prsec = FCF_SECIDX_NONE;
635 
636 	if (cip->ci_xprt != NULL)
637 		return; /* do not checkpoint cases from remote transports */
638 
639 	if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
640 		size_t size = sizeof (fcf_buf_t) * n;
641 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
642 
643 		fmd_buf_hash_apply(&cip->ci_bufs,
644 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
645 
646 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
647 		fmd_free(bufs, size);
648 	}
649 
650 	if (cip->ci_principal != NULL) {
651 		prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
652 		    sizeof (fcf_event_t));
653 
654 		fmd_ckpt_save_event(ckp, cip->ci_principal);
655 	}
656 
657 	if (cip->ci_nitems != 0) {
658 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
659 		    sizeof (fcf_event_t) * cip->ci_nitems);
660 
661 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
662 			fmd_ckpt_save_event(ckp, cit->cit_event);
663 	}
664 
665 	if (cip->ci_nsuspects != 0) {
666 		nvsec = fmd_ckpt_section(ckp, NULL,
667 		    FCF_SECT_NVLISTS, cip->ci_nvsz);
668 
669 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
670 			fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
671 	}
672 
673 	fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
674 	fcfc.fcfc_bufs = bufsec;
675 	fcfc.fcfc_principal = prsec;
676 	fcfc.fcfc_events = evsec;
677 	fcfc.fcfc_suspects = nvsec;
678 
679 	switch (cip->ci_state) {
680 	case FMD_CASE_UNSOLVED:
681 		fcfc.fcfc_state = FCF_CASE_UNSOLVED;
682 		break;
683 	case FMD_CASE_SOLVED:
684 		fcfc.fcfc_state = FCF_CASE_SOLVED;
685 		break;
686 	case FMD_CASE_CLOSE_WAIT:
687 		fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
688 		break;
689 	default:
690 		fmd_panic("case %p (%s) has invalid state %u",
691 		    (void *)cp, cip->ci_uuid, cip->ci_state);
692 	}
693 
694 	(void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
695 }
696 
697 static void
698 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
699 {
700 	fmd_case_t *cp;
701 	uint_t n;
702 
703 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
704 		fmd_ckpt_resv_case(ckp, cp);
705 
706 	n = fmd_serd_hash_count(&mp->mod_serds);
707 	fmd_serd_hash_apply(&mp->mod_serds,
708 	    (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
709 	fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
710 
711 	n = fmd_buf_hash_count(&mp->mod_bufs);
712 	fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
713 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
714 
715 	fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
716 	ckp->ckp_strn += strlen(mp->mod_name) + 1;
717 	ckp->ckp_strn += strlen(mp->mod_path) + 1;
718 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
719 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
720 }
721 
722 static void
723 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
724 {
725 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
726 	fcf_module_t fcfm;
727 	fmd_case_t *cp;
728 	uint_t n;
729 
730 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
731 		fmd_ckpt_save_case(ckp, cp);
732 
733 	if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
734 		size_t size = sizeof (fcf_serd_t) * n;
735 		fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
736 
737 		fmd_serd_hash_apply(&mp->mod_serds,
738 		    (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
739 
740 		(void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
741 		fmd_free(serds, size);
742 	}
743 
744 	if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
745 		size_t size = sizeof (fcf_buf_t) * n;
746 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
747 
748 		fmd_buf_hash_apply(&mp->mod_bufs,
749 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
750 
751 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
752 		fmd_free(bufs, size);
753 	}
754 
755 	fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
756 	fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
757 	fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
758 	fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
759 	fcfm.fcfm_bufs = bufsec;
760 
761 	(void) fmd_ckpt_section(ckp, &fcfm,
762 	    FCF_SECT_MODULE, sizeof (fcf_module_t));
763 }
764 
765 void
766 fmd_ckpt_save(fmd_module_t *mp)
767 {
768 	struct stat64 st;
769 	char path[PATH_MAX];
770 	mode_t dirmode;
771 
772 	hrtime_t now = gethrtime();
773 	fmd_ckpt_t ckp;
774 	int err;
775 
776 	ASSERT(fmd_module_locked(mp));
777 
778 	/*
779 	 * If checkpointing is disabled for the module, just return.  We must
780 	 * commit the module state anyway to transition pending log events.
781 	 */
782 	if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
783 		fmd_module_commit(mp);
784 		return;
785 	}
786 
787 	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
788 		return; /* no checkpoint is necessary for this module */
789 
790 	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
791 	    mp->mod_name, mp->mod_gen + 1));
792 
793 	/*
794 	 * If the per-module checkpoint directory isn't found or isn't of type
795 	 * directory, move aside whatever is there (if anything) and attempt
796 	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
797 	 * have no choice but to abort the checkpoint and try again later.
798 	 */
799 	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
800 		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
801 		(void) rename(mp->mod_ckpt, path);
802 		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
803 
804 		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
805 			fmd_error(EFMD_CKPT_MKDIR,
806 			    "failed to mkdir %s", mp->mod_ckpt);
807 			return; /* return without clearing dirty bits */
808 		}
809 	}
810 
811 	/*
812 	 * Create a temporary file to write out the checkpoint into, and create
813 	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
814 	 * then figure out how much space will be required, and allocate it.
815 	 */
816 	if (fmd_ckpt_create(&ckp, mp) == -1) {
817 		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
818 		return;
819 	}
820 
821 	fmd_ckpt_resv_module(&ckp, mp);
822 
823 	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
824 		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
825 		fmd_ckpt_destroy(&ckp);
826 		return;
827 	}
828 
829 	/*
830 	 * Fill in the checkpoint content, write it to disk, sync it, and then
831 	 * atomically rename it to the destination path.  If this fails, we
832 	 * have no choice but to leave all our dirty bits set and return.
833 	 */
834 	fmd_ckpt_save_module(&ckp, mp);
835 	err = fmd_ckpt_commit(&ckp);
836 	fmd_ckpt_destroy(&ckp);
837 
838 	if (err != 0) {
839 		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
840 		return; /* return without clearing dirty bits */
841 	}
842 
843 	fmd_module_commit(mp);
844 	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
845 
846 	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
847 	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
848 
849 	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
850 	    mp->mod_name, mp->mod_gen);
851 }
852 
853 /*
854  * Utility function to retrieve a pointer to a section's header and verify that
855  * it is of the expected type or it is a FCF_SECT_NONE reference.
856  */
857 static const fcf_sec_t *
858 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
859 {
860 	const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
861 	    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
862 
863 	return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
864 	    sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
865 }
866 
867 /*
868  * Utility function to retrieve the data pointer for a particular section.  The
869  * validity of the header values has already been checked by fmd_ckpt_open().
870  */
871 static const void *
872 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
873 {
874 	return (ckp->ckp_buf + sp->fcfs_offset);
875 }
876 
877 /*
878  * Utility function to retrieve the end of the data region for a particular
879  * section.  The validity of this value has been confirmed by fmd_ckpt_open().
880  */
881 static const void *
882 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
883 {
884 	return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
885 }
886 
887 /*
888  * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
889  * index is valid, the string data is returned; otherwise 'defstr' is returned.
890  */
891 static const char *
892 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
893 {
894 	return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
895 }
896 
897 static void
898 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
899     void (*func)(void *, fmd_event_t *), void *arg)
900 {
901 	const fcf_event_t *fcfe;
902 	const fcf_sec_t *sp;
903 	fmd_timeval_t ftv;
904 	fmd_log_t *lp, *errlp;
905 	uint_t i, n;
906 	uint32_t e_maj, e_min;
907 	uint64_t e_ino;
908 
909 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
910 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
911 		    "invalid link to section %u: expected events\n", sid);
912 	}
913 
914 	if (sp->fcfs_size == 0)
915 		return; /* empty events section or type none */
916 
917 	fcfe = fmd_ckpt_dataptr(ckp, sp);
918 	n = sp->fcfs_size / sp->fcfs_entsize;
919 
920 	/*
921 	 * Hold the reader lock on log pointers to block log rotation during
922 	 * the section restore so that we can safely insert refs to d_errlog.
923 	 */
924 	(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
925 	errlp = fmd.d_errlog;
926 
927 	e_maj = major(errlp->log_stat.st_dev);
928 	e_min = minor(errlp->log_stat.st_dev);
929 	e_ino = errlp->log_stat.st_ino;
930 
931 	for (i = 0; i < n; i++) {
932 		fmd_event_t *ep;
933 
934 		ftv.ftv_sec = fcfe->fcfe_todsec;
935 		ftv.ftv_nsec = fcfe->fcfe_todnsec;
936 
937 		if (e_ino == fcfe->fcfe_inode &&
938 		    e_maj == fcfe->fcfe_major &&
939 		    e_min == fcfe->fcfe_minor)
940 			lp = errlp;
941 		else
942 			lp = NULL;
943 
944 		ep = fmd_event_recreate(FMD_EVT_PROTOCOL,
945 		    &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0);
946 		fmd_event_hold(ep);
947 		func(arg, ep);
948 		fmd_event_rele(ep);
949 
950 		fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
951 	}
952 
953 	(void) pthread_rwlock_unlock(&fmd.d_log_lock);
954 }
955 
956 static int
957 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
958 {
959 	const fcf_nvl_t *fcfn, *endn;
960 	const fcf_sec_t *sp;
961 	nvlist_t *nvl;
962 	int err, i;
963 
964 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
965 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
966 		    "invalid link to section %u: expected nvlists\n", sid);
967 	}
968 
969 	fcfn = fmd_ckpt_dataptr(ckp, sp);
970 	endn = fmd_ckpt_datalim(ckp, sp);
971 
972 	for (i = 0; fcfn < endn; i++) {
973 		char *data = (char *)fcfn + sp->fcfs_entsize;
974 		size_t size = (size_t)fcfn->fcfn_size;
975 
976 		if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
977 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
978 			    "size %u exceeds buffer\n", sid, i, size);
979 		}
980 
981 		if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
982 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
983 			    "unpack nvlist %u [%d]: %s\n", sid, i,
984 			    fmd_strerror(err));
985 		}
986 
987 		fmd_case_insert_suspect(cp, nvl);
988 
989 		size = sp->fcfs_entsize + fcfn->fcfn_size;
990 		size = P2ROUNDUP(size, sizeof (uint64_t));
991 		fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
992 	}
993 
994 	return (i);
995 }
996 
997 static void
998 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
999     fmd_case_t *cp, fcf_secidx_t sid)
1000 {
1001 	const fcf_sec_t *sp, *dsp;
1002 	const fcf_buf_t *fcfb;
1003 	uint_t i, n;
1004 
1005 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
1006 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1007 		    "invalid link to section %u: expected bufs\n", sid);
1008 	}
1009 
1010 	if (sp->fcfs_size == 0)
1011 		return; /* empty events section or type none */
1012 
1013 	fcfb = fmd_ckpt_dataptr(ckp, sp);
1014 	n = sp->fcfs_size / sp->fcfs_entsize;
1015 
1016 	for (i = 0; i < n; i++) {
1017 		dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1018 
1019 		if (dsp == NULL) {
1020 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1021 			    "buffer link %u\n", sid, fcfb->fcfb_data);
1022 		}
1023 
1024 		fmd_buf_write((fmd_hdl_t *)mp, cp,
1025 		    fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1026 		    ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1027 
1028 		fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1029 	}
1030 }
1031 
1032 static void
1033 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1034 {
1035 	const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1036 	const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1037 	fmd_case_t *cp;
1038 	int n;
1039 
1040 	if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
1041 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1042 		    "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1043 	}
1044 
1045 	fmd_module_lock(mp);
1046 
1047 	if ((cp = fmd_case_recreate(mp, NULL,
1048 	    FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
1049 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1050 		    "duplicate case uuid: %s\n", uuid);
1051 	}
1052 
1053 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1054 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
1055 
1056 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1057 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
1058 
1059 	n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1060 
1061 	if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1062 		fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1063 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
1064 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
1065 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
1066 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1067 
1068 	fmd_module_unlock(mp);
1069 	fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1070 }
1071 
1072 static void
1073 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1074 {
1075 	const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1076 	uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1077 	const fcf_sec_t *esp;
1078 	const char *s;
1079 
1080 	for (i = 0; i < n; i++) {
1081 		esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1082 
1083 		if (esp == NULL) {
1084 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1085 			    "invalid events link %u\n", fcfd->fcfd_events);
1086 		}
1087 
1088 		if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1089 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1090 			    "serd name %u is corrupt\n", fcfd->fcfd_name);
1091 		}
1092 
1093 		fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1094 		fmd_module_lock(mp);
1095 
1096 		fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1097 		    (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
1098 		    fmd_serd_eng_lookup(&mp->mod_serds, s));
1099 
1100 		fmd_module_unlock(mp);
1101 		fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1102 	}
1103 }
1104 
1105 static void
1106 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1107 {
1108 	const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1109 	const fcf_sec_t *sp;
1110 	uint_t i;
1111 
1112 	if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1113 	    strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1114 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1115 		    "checkpoint is not for module %s\n", mp->mod_name);
1116 	}
1117 
1118 	for (i = 0; i < ckp->ckp_secs; i++) {
1119 		sp = (void *)(ckp->ckp_buf +
1120 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1121 
1122 		switch (sp->fcfs_type) {
1123 		case FCF_SECT_CASE:
1124 			fmd_ckpt_restore_case(ckp, mp, sp);
1125 			break;
1126 		case FCF_SECT_SERD:
1127 			fmd_ckpt_restore_serd(ckp, mp, sp);
1128 			break;
1129 		}
1130 	}
1131 
1132 	fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1133 	mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1134 }
1135 
1136 /*
1137  * Restore a checkpoint for the specified module.  Any errors which occur
1138  * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1139  * either of which will automatically unlock the module and trigger an abort.
1140  */
1141 void
1142 fmd_ckpt_restore(fmd_module_t *mp)
1143 {
1144 	fmd_ckpt_t ckp;
1145 
1146 	if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1147 		return; /* never restore checkpoints for this module */
1148 
1149 	TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1150 
1151 	if (fmd_ckpt_open(&ckp, mp) == -1) {
1152 		if (errno != ENOENT)
1153 			fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1154 		TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1155 		return;
1156 	}
1157 
1158 	ASSERT(!fmd_module_locked(mp));
1159 	fmd_ckpt_restore_module(&ckp, mp);
1160 	fmd_ckpt_destroy(&ckp);
1161 	fmd_module_clrdirty(mp);
1162 
1163 	TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1164 	fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1165 }
1166 
1167 /*
1168  * Delete the module's checkpoint file.  This is used by the ckpt.zero property
1169  * code or by the fmadm reset RPC service path to force a checkpoint delete.
1170  */
1171 void
1172 fmd_ckpt_delete(fmd_module_t *mp)
1173 {
1174 	char path[PATH_MAX];
1175 
1176 	(void) snprintf(path, sizeof (path),
1177 	    "%s/%s", mp->mod_ckpt, mp->mod_name);
1178 
1179 	TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1180 
1181 	if (unlink(path) != 0 && errno != ENOENT)
1182 		fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1183 }
1184 
1185 /*
1186  * Move aside the module's checkpoint file if checkpoint restore has failed.
1187  * We rename the file rather than deleting it in the hopes that someone might
1188  * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1189  */
1190 void
1191 fmd_ckpt_rename(fmd_module_t *mp)
1192 {
1193 	char src[PATH_MAX], dst[PATH_MAX];
1194 
1195 	(void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1196 	(void) snprintf(dst, sizeof (dst), "%s-", src);
1197 
1198 	TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1199 
1200 	if (rename(src, dst) != 0 && errno != ENOENT)
1201 		fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1202 }
1203