xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_ckpt.c (revision 700c902c445eb3882848aaddc19d13638818cfd6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/mkdev.h>
32 #include <sys/stat.h>
33 
34 #include <strings.h>
35 #include <unistd.h>
36 #include <limits.h>
37 #include <fcntl.h>
38 
39 #include <fmd_module.h>
40 #include <fmd_error.h>
41 #include <fmd_alloc.h>
42 #include <fmd_case.h>
43 #include <fmd_serd.h>
44 #include <fmd_subr.h>
45 #include <fmd_conf.h>
46 #include <fmd_event.h>
47 #include <fmd_log.h>
48 #include <fmd_api.h>
49 #include <fmd_ckpt.h>
50 
51 #include <fmd.h>
52 
53 #define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
54 #define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
55 
56 /*
57  * The fmd_ckpt_t structure is used to manage all of the state needed by the
58  * various subroutines that save and restore checkpoints.  The structure is
59  * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
60  * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
61  */
62 typedef struct fmd_ckpt {
63 	char ckp_src[PATH_MAX];	/* ckpt input or output filename */
64 	char ckp_dst[PATH_MAX];	/* ckpt rename filename */
65 	uchar_t *ckp_buf;	/* data buffer base address */
66 	fcf_hdr_t *ckp_hdr;	/* file header pointer */
67 	uchar_t *ckp_ptr;	/* data buffer pointer */
68 	size_t ckp_size;	/* data buffer size */
69 	fcf_sec_t *ckp_secp;	/* section header table pointer */
70 	fcf_sec_t *ckp_modp;	/* section header for module */
71 	uint_t ckp_secs;	/* number of sections */
72 	char *ckp_strs;		/* string table base pointer */
73 	char *ckp_strp;		/* string table pointer */
74 	size_t ckp_strn;	/* string table size */
75 	int ckp_fd;		/* output descriptor */
76 	fmd_module_t *ckp_mp;	/* checkpoint module */
77 	void *ckp_arg;		/* private arg for callbacks */
78 } fmd_ckpt_t;
79 
80 typedef struct fmd_ckpt_desc {
81 	uint64_t secd_size;	/* minimum section size */
82 	uint32_t secd_entsize;	/* minimum section entry size */
83 	uint32_t secd_align;	/* section alignment */
84 } fmd_ckpt_desc_t;
85 
86 /*
87  * Table of FCF section descriptions.  Here we record the minimum size for each
88  * section (for use during restore) and the expected entry size and alignment
89  * for each section (for use during both checkpoint and restore).
90  */
91 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
92 { 0, 0, sizeof (uint8_t) },					   /* NONE */
93 { 1, 0, sizeof (char) },					   /* STRTAB */
94 { sizeof (fcf_module_t), 0, sizeof (uint32_t) },		   /* MODULE */
95 { sizeof (fcf_case_t), 0, sizeof (uint32_t) },			   /* CASE */
96 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },	   /* BUFS */
97 { 0, 0, _MAX_ALIGNMENT },					   /* BUFFER */
98 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
99 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
100 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },	   /* NVLISTS */
101 };
102 
103 static int
104 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
105 {
106 	const char *dir = mp->mod_ckpt;
107 	const char *name = mp->mod_name;
108 	mode_t mode;
109 
110 	bzero(ckp, sizeof (fmd_ckpt_t));
111 	ckp->ckp_mp = mp;
112 
113 	ckp->ckp_size = sizeof (fcf_hdr_t);
114 	ckp->ckp_strn = 1; /* for \0 */
115 
116 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
117 	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
118 
119 	(void) unlink(ckp->ckp_src);
120 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
121 	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
122 
123 	return (ckp->ckp_fd);
124 }
125 
126 /*PRINTFLIKE2*/
127 static int
128 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
129 {
130 	va_list ap;
131 
132 	va_start(ap, format);
133 	fmd_verror(EFMD_CKPT_INVAL, format, ap);
134 	va_end(ap);
135 
136 	fmd_free(ckp->ckp_buf, ckp->ckp_size);
137 	return (fmd_set_errno(EFMD_CKPT_INVAL));
138 }
139 
140 static int
141 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
142 {
143 	struct stat64 st;
144 	uint64_t seclen;
145 	uint_t i;
146 	int err;
147 
148 	bzero(ckp, sizeof (fmd_ckpt_t));
149 	ckp->ckp_mp = mp;
150 
151 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
152 	    mp->mod_ckpt, mp->mod_name);
153 
154 	if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
155 		return (-1); /* failed to open checkpoint file */
156 
157 	if (fstat64(ckp->ckp_fd, &st) == -1) {
158 		err = errno;
159 		(void) close(ckp->ckp_fd);
160 		return (fmd_set_errno(err));
161 	}
162 
163 	ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
164 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
165 	ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
166 
167 	if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
168 	    ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
169 		err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
170 		fmd_free(ckp->ckp_buf, st.st_size);
171 		(void) close(ckp->ckp_fd);
172 		return (fmd_set_errno(err));
173 	}
174 
175 	(void) close(ckp->ckp_fd);
176 	ckp->ckp_fd = -1;
177 
178 	/*
179 	 * Once we've read in a consistent copy of the FCF file and we're sure
180 	 * the header can be accessed, go through it and make sure everything
181 	 * is valid.  We also check that unused bits are zero so we can expand
182 	 * to use them safely in the future and support old files if needed.
183 	 */
184 	if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
185 	    FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
186 		return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
187 
188 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
189 		return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
190 
191 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
192 		return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
193 
194 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
195 		return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
196 		    ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
197 	}
198 
199 	for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
200 		if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
201 			return (fmd_ckpt_inval(ckp,
202 			    "bad checkpoint padding at id[%d]", i));
203 		}
204 	}
205 
206 	if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
207 		return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
208 
209 	if (ckp->ckp_hdr->fcfh_pad != 0)
210 		return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
211 
212 	if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
213 	    ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
214 		return (fmd_ckpt_inval(ckp,
215 		    "bad header and/or section size\n"));
216 	}
217 
218 	seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
219 	    (uint64_t)ckp->ckp_hdr->fcfh_secsize;
220 
221 	if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
222 	    seclen > ckp->ckp_size ||
223 	    ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
224 	    ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
225 		return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
226 
227 	if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
228 	    !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
229 		return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
230 
231 	/*
232 	 * Once the header is validated, iterate over the section headers
233 	 * ensuring that each one is valid w.r.t. offset, alignment, and size.
234 	 * We also pick up the string table pointer during this pass.
235 	 */
236 	ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
237 	ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
238 
239 	for (i = 0; i < ckp->ckp_secs; i++) {
240 		fcf_sec_t *sp = (void *)(ckp->ckp_buf +
241 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
242 
243 		const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
244 
245 		if (sp->fcfs_flags != 0) {
246 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
247 			    "section flags (0x%x)\n", i, sp->fcfs_flags));
248 		}
249 
250 		if (sp->fcfs_align & (sp->fcfs_align - 1)) {
251 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
252 			    "alignment (%u)\n", i, sp->fcfs_align));
253 		}
254 
255 		if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
256 			return (fmd_ckpt_inval(ckp, "section %u is not properly"
257 			    " aligned (offset %llu)\n", i, sp->fcfs_offset));
258 		}
259 
260 		if (sp->fcfs_entsize != 0 &&
261 		    (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
262 			return (fmd_ckpt_inval(ckp, "section %u has misaligned "
263 			    "entsize %u\n", i, sp->fcfs_entsize));
264 		}
265 
266 		if (sp->fcfs_offset > ckp->ckp_size ||
267 		    sp->fcfs_size > ckp->ckp_size ||
268 		    sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
269 		    sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
270 			return (fmd_ckpt_inval(ckp, "section %u has corrupt "
271 			    "size or offset\n", i));
272 		}
273 
274 		if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
275 		    sizeof (_fmd_ckpt_sections[0])) {
276 			return (fmd_ckpt_inval(ckp, "section %u has unknown "
277 			    "section type %u\n", i, sp->fcfs_type));
278 		}
279 
280 		if (sp->fcfs_align != dp->secd_align) {
281 			return (fmd_ckpt_inval(ckp, "section %u has align %u "
282 			    "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
283 		}
284 
285 		if (sp->fcfs_size < dp->secd_size ||
286 		    sp->fcfs_entsize < dp->secd_entsize) {
287 			return (fmd_ckpt_inval(ckp, "section %u has short "
288 			    "size or entsize\n", i));
289 		}
290 
291 		switch (sp->fcfs_type) {
292 		case FCF_SECT_STRTAB:
293 			if (ckp->ckp_strs != NULL) {
294 				return (fmd_ckpt_inval(ckp, "multiple string "
295 				    "tables are present in checkpoint file\n"));
296 			}
297 
298 			ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
299 			ckp->ckp_strn = sp->fcfs_size;
300 
301 			if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
302 				return (fmd_ckpt_inval(ckp, "string table %u "
303 				    "is missing terminating nul byte\n", i));
304 			}
305 			break;
306 
307 		case FCF_SECT_MODULE:
308 			if (ckp->ckp_modp != NULL) {
309 				return (fmd_ckpt_inval(ckp, "multiple module "
310 				    "sects are present in checkpoint file\n"));
311 			}
312 			ckp->ckp_modp = sp;
313 			break;
314 		}
315 	}
316 
317 	/*
318 	 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
319 	 * This is done to ensure that links can use index 0 as a null section.
320 	 */
321 	if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
322 	    ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
323 		return (fmd_ckpt_inval(ckp, "section 0 is not of the "
324 		    "appropriate size and/or attributes (SECT_NONE)\n"));
325 	}
326 
327 	if (ckp->ckp_modp == NULL) {
328 		return (fmd_ckpt_inval(ckp,
329 		    "no module section found in file\n"));
330 	}
331 
332 	return (0);
333 }
334 
335 static void
336 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
337 {
338 	if (ckp->ckp_buf != NULL)
339 		fmd_free(ckp->ckp_buf, ckp->ckp_size);
340 	if (ckp->ckp_fd >= 0)
341 		(void) close(ckp->ckp_fd);
342 }
343 
344 /*
345  * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
346  * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
347  * then aborts the API call and the surrounding module entry point by doing an
348  * fmd_module_abort(), which longjmps to the place where we entered the module.
349  * Depending on the type of error and conf settings, we will reset or fail.
350  */
351 /*PRINTFLIKE3*/
352 static void
353 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
354 {
355 	fmd_module_t *mp = ckp->ckp_mp;
356 	va_list ap;
357 
358 	va_start(ap, format);
359 	fmd_verror(err, format, ap);
360 	va_end(ap);
361 
362 	if (fmd_module_locked(mp))
363 		fmd_module_unlock(mp);
364 
365 	fmd_ckpt_destroy(ckp);
366 	fmd_module_abort(mp, err);
367 }
368 
369 static fcf_secidx_t
370 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
371 {
372 	const fmd_ckpt_desc_t *dp;
373 
374 	ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
375 	dp = &_fmd_ckpt_sections[type];
376 
377 	ckp->ckp_ptr = (uchar_t *)
378 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
379 
380 	ckp->ckp_secp->fcfs_type = type;
381 	ckp->ckp_secp->fcfs_align = dp->secd_align;
382 	ckp->ckp_secp->fcfs_flags = 0;
383 	ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
384 	ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
385 	ckp->ckp_secp->fcfs_size = size;
386 
387 	/*
388 	 * If the data pointer is non-NULL, copy the data to our buffer; else
389 	 * the caller is responsible for doing so and updating ckp->ckp_ptr.
390 	 */
391 	if (data != NULL) {
392 		bcopy(data, ckp->ckp_ptr, size);
393 		ckp->ckp_ptr += size;
394 	}
395 
396 	ckp->ckp_secp++;
397 	return (ckp->ckp_secs++);
398 }
399 
400 static fcf_stridx_t
401 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
402 {
403 	fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
404 
405 	(void) strcpy(ckp->ckp_strp, s);
406 	ckp->ckp_strp += strlen(s) + 1;
407 
408 	return (idx);
409 }
410 
411 static int
412 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
413 {
414 	/*
415 	 * We've added up all the sections by now: add two more for SECT_NONE
416 	 * and SECT_STRTAB, and add the size of the section header table and
417 	 * string table to the total size.  We know that the fcf_hdr_t is
418 	 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
419 	 * is aligned so that any section can follow it, so no extra padding
420 	 * bytes need to be allocated between any of these items.
421 	 */
422 	ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
423 	ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
424 	ckp->ckp_size += ckp->ckp_strn;
425 
426 	TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
427 	ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
428 
429 	if (ckp->ckp_buf == NULL)
430 		return (-1); /* errno is set for us */
431 
432 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
433 
434 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
435 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
436 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
437 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
438 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
439 	ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
440 	ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
441 
442 	ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
443 	ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
444 	ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
445 	ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
446 	ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
447 	ckp->ckp_hdr->fcfh_cgen = gen;
448 
449 	ckp->ckp_secs = 0; /* reset section counter for second pass */
450 	ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
451 	ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
452 	ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
453 	ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
454 
455 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
456 	return (0);
457 }
458 
459 static int
460 fmd_ckpt_commit(fmd_ckpt_t *ckp)
461 {
462 	fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
463 	size_t stroff = ckp->ckp_size - ckp->ckp_strn;
464 
465 	/*
466 	 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
467 	 * and current pointer locations all add up appropriately.  Any ASSERTs
468 	 * which trip here likely indicate an inconsistency in the code for the
469 	 * reservation pass and the buffer update pass of the FCF subroutines.
470 	 */
471 	ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
472 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
473 	ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
474 
475 	ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
476 	ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
477 	ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
478 
479 	if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
480 	    fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
481 		return (-1); /* errno is set for us */
482 
483 	ckp->ckp_fd = -1; /* fd is now closed */
484 	return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
485 }
486 
487 static void
488 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
489 {
490 	if (size != 0) {
491 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
492 		ckp->ckp_secs++;
493 	}
494 }
495 
496 static void
497 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
498 {
499 	ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
500 	ckp->ckp_strn += strlen(bp->buf_name) + 1;
501 	ckp->ckp_secs++;
502 }
503 
504 static void
505 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
506 {
507 	fcf_buf_t *fcfb = ckp->ckp_arg;
508 
509 	fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
510 	fcfb->fcfb_data = fmd_ckpt_section(ckp,
511 	    bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
512 
513 	ckp->ckp_arg = fcfb + 1;
514 }
515 
516 static void
517 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
518 {
519 	fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
520 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
521 	fmd_log_t *lp = ep->ev_log;
522 
523 	fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
524 	fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
525 	fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
526 	fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
527 	fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
528 	fcfe->fcfe_offset = ep->ev_off;
529 
530 	ckp->ckp_ptr += sizeof (fcf_event_t);
531 }
532 
533 static void
534 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
535 {
536 	fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
537 	char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
538 	size_t nvsize = 0;
539 
540 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
541 	fcfn->fcfn_size = (uint64_t)nvsize;
542 
543 	(void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
544 	ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
545 
546 	ckp->ckp_ptr = (uchar_t *)
547 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
548 }
549 
550 static void
551 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
552 {
553 	fmd_ckpt_resv(ckp,
554 	    sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
555 
556 	ckp->ckp_strn += strlen(sgp->sg_name) + 1;
557 }
558 
559 static void
560 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
561 {
562 	fcf_serd_t *fcfd = ckp->ckp_arg;
563 	fcf_secidx_t evsec = FCF_SECT_NONE;
564 	fmd_serd_elem_t *sep;
565 
566 	if (sgp->sg_count != 0) {
567 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
568 		    sizeof (fcf_event_t) * sgp->sg_count);
569 
570 		for (sep = fmd_list_next(&sgp->sg_list);
571 		    sep != NULL; sep = fmd_list_next(sep))
572 			fmd_ckpt_save_event(ckp, sep->se_event);
573 	}
574 
575 	fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
576 	fcfd->fcfd_events = evsec;
577 	fcfd->fcfd_pad = 0;
578 	fcfd->fcfd_n = sgp->sg_n;
579 	fcfd->fcfd_t = sgp->sg_t;
580 
581 	ckp->ckp_arg = fcfd + 1;
582 }
583 
584 static void
585 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
586 {
587 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
588 	fmd_case_susp_t *cis;
589 	uint_t n;
590 
591 	if (cip->ci_xprt != NULL)
592 		return; /* do not checkpoint cases from remote transports */
593 
594 	n = fmd_buf_hash_count(&cip->ci_bufs);
595 	fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
596 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
597 
598 	if (cip->ci_principal != NULL)
599 		fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
600 
601 	fmd_ckpt_resv(ckp,
602 	    sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
603 
604 	if (cip->ci_nsuspects != 0)
605 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
606 
607 	cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
608 
609 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
610 		size_t nvsize = 0;
611 
612 		(void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
613 		cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
614 		cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
615 	}
616 
617 	fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
618 	fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
619 	ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
620 }
621 
622 static void
623 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
624 {
625 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
626 
627 	fmd_case_item_t *cit;
628 	fmd_case_susp_t *cis;
629 	fcf_case_t fcfc;
630 	uint_t n;
631 
632 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
633 	fcf_secidx_t evsec = FCF_SECIDX_NONE;
634 	fcf_secidx_t nvsec = FCF_SECIDX_NONE;
635 	fcf_secidx_t prsec = FCF_SECIDX_NONE;
636 
637 	if (cip->ci_xprt != NULL)
638 		return; /* do not checkpoint cases from remote transports */
639 
640 	if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
641 		size_t size = sizeof (fcf_buf_t) * n;
642 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
643 
644 		fmd_buf_hash_apply(&cip->ci_bufs,
645 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
646 
647 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
648 		fmd_free(bufs, size);
649 	}
650 
651 	if (cip->ci_principal != NULL) {
652 		prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
653 		    sizeof (fcf_event_t));
654 
655 		fmd_ckpt_save_event(ckp, cip->ci_principal);
656 	}
657 
658 	if (cip->ci_nitems != 0) {
659 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
660 		    sizeof (fcf_event_t) * cip->ci_nitems);
661 
662 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
663 			fmd_ckpt_save_event(ckp, cit->cit_event);
664 	}
665 
666 	if (cip->ci_nsuspects != 0) {
667 		nvsec = fmd_ckpt_section(ckp, NULL,
668 		    FCF_SECT_NVLISTS, cip->ci_nvsz);
669 
670 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
671 			fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
672 	}
673 
674 	fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
675 	fcfc.fcfc_bufs = bufsec;
676 	fcfc.fcfc_principal = prsec;
677 	fcfc.fcfc_events = evsec;
678 	fcfc.fcfc_suspects = nvsec;
679 
680 	switch (cip->ci_state) {
681 	case FMD_CASE_UNSOLVED:
682 		fcfc.fcfc_state = FCF_CASE_UNSOLVED;
683 		break;
684 	case FMD_CASE_SOLVED:
685 		fcfc.fcfc_state = FCF_CASE_SOLVED;
686 		break;
687 	case FMD_CASE_CLOSE_WAIT:
688 		fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
689 		break;
690 	default:
691 		fmd_panic("case %p (%s) has invalid state %u",
692 		    (void *)cp, cip->ci_uuid, cip->ci_state);
693 	}
694 
695 	(void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
696 }
697 
698 static void
699 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
700 {
701 	fmd_case_t *cp;
702 	uint_t n;
703 
704 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
705 		fmd_ckpt_resv_case(ckp, cp);
706 
707 	n = fmd_serd_hash_count(&mp->mod_serds);
708 	fmd_serd_hash_apply(&mp->mod_serds,
709 	    (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
710 	fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
711 
712 	n = fmd_buf_hash_count(&mp->mod_bufs);
713 	fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
714 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
715 
716 	fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
717 	ckp->ckp_strn += strlen(mp->mod_name) + 1;
718 	ckp->ckp_strn += strlen(mp->mod_path) + 1;
719 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
720 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
721 }
722 
723 static void
724 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
725 {
726 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
727 	fcf_module_t fcfm;
728 	fmd_case_t *cp;
729 	uint_t n;
730 
731 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
732 		fmd_ckpt_save_case(ckp, cp);
733 
734 	if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
735 		size_t size = sizeof (fcf_serd_t) * n;
736 		fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
737 
738 		fmd_serd_hash_apply(&mp->mod_serds,
739 		    (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
740 
741 		(void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
742 		fmd_free(serds, size);
743 	}
744 
745 	if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
746 		size_t size = sizeof (fcf_buf_t) * n;
747 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
748 
749 		fmd_buf_hash_apply(&mp->mod_bufs,
750 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
751 
752 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
753 		fmd_free(bufs, size);
754 	}
755 
756 	fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
757 	fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
758 	fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
759 	fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
760 	fcfm.fcfm_bufs = bufsec;
761 
762 	(void) fmd_ckpt_section(ckp, &fcfm,
763 	    FCF_SECT_MODULE, sizeof (fcf_module_t));
764 }
765 
766 void
767 fmd_ckpt_save(fmd_module_t *mp)
768 {
769 	struct stat64 st;
770 	char path[PATH_MAX];
771 	mode_t dirmode;
772 
773 	hrtime_t now = gethrtime();
774 	fmd_ckpt_t ckp;
775 	int err;
776 
777 	ASSERT(fmd_module_locked(mp));
778 
779 	/*
780 	 * If checkpointing is disabled for the module, just return.  We must
781 	 * commit the module state anyway to transition pending log events.
782 	 */
783 	if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
784 		fmd_module_commit(mp);
785 		return;
786 	}
787 
788 	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
789 		return; /* no checkpoint is necessary for this module */
790 
791 	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
792 	    mp->mod_name, mp->mod_gen + 1));
793 
794 	/*
795 	 * If the per-module checkpoint directory isn't found or isn't of type
796 	 * directory, move aside whatever is there (if anything) and attempt
797 	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
798 	 * have no choice but to abort the checkpoint and try again later.
799 	 */
800 	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
801 		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
802 		(void) rename(mp->mod_ckpt, path);
803 		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
804 
805 		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
806 			fmd_error(EFMD_CKPT_MKDIR,
807 			    "failed to mkdir %s", mp->mod_ckpt);
808 			return; /* return without clearing dirty bits */
809 		}
810 	}
811 
812 	/*
813 	 * Create a temporary file to write out the checkpoint into, and create
814 	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
815 	 * then figure out how much space will be required, and allocate it.
816 	 */
817 	if (fmd_ckpt_create(&ckp, mp) == -1) {
818 		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
819 		return;
820 	}
821 
822 	fmd_ckpt_resv_module(&ckp, mp);
823 
824 	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
825 		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
826 		fmd_ckpt_destroy(&ckp);
827 		return;
828 	}
829 
830 	/*
831 	 * Fill in the checkpoint content, write it to disk, sync it, and then
832 	 * atomically rename it to the destination path.  If this fails, we
833 	 * have no choice but to leave all our dirty bits set and return.
834 	 */
835 	fmd_ckpt_save_module(&ckp, mp);
836 	err = fmd_ckpt_commit(&ckp);
837 	fmd_ckpt_destroy(&ckp);
838 
839 	if (err != 0) {
840 		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
841 		return; /* return without clearing dirty bits */
842 	}
843 
844 	fmd_module_commit(mp);
845 	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
846 
847 	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
848 	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
849 
850 	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
851 	    mp->mod_name, mp->mod_gen);
852 }
853 
854 /*
855  * Utility function to retrieve a pointer to a section's header and verify that
856  * it is of the expected type or it is a FCF_SECT_NONE reference.
857  */
858 static const fcf_sec_t *
859 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
860 {
861 	const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
862 	    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
863 
864 	return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
865 	    sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
866 }
867 
868 /*
869  * Utility function to retrieve the data pointer for a particular section.  The
870  * validity of the header values has already been checked by fmd_ckpt_open().
871  */
872 static const void *
873 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
874 {
875 	return (ckp->ckp_buf + sp->fcfs_offset);
876 }
877 
878 /*
879  * Utility function to retrieve the end of the data region for a particular
880  * section.  The validity of this value has been confirmed by fmd_ckpt_open().
881  */
882 static const void *
883 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
884 {
885 	return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
886 }
887 
888 /*
889  * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
890  * index is valid, the string data is returned; otherwise 'defstr' is returned.
891  */
892 static const char *
893 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
894 {
895 	return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
896 }
897 
898 static void
899 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
900     void (*func)(void *, fmd_event_t *), void *arg)
901 {
902 	const fcf_event_t *fcfe;
903 	const fcf_sec_t *sp;
904 	fmd_timeval_t ftv;
905 	fmd_log_t *lp, *errlp;
906 	uint_t i, n;
907 	uint32_t e_maj, e_min;
908 	uint64_t e_ino;
909 
910 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
911 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
912 		    "invalid link to section %u: expected events\n", sid);
913 	}
914 
915 	if (sp->fcfs_size == 0)
916 		return; /* empty events section or type none */
917 
918 	fcfe = fmd_ckpt_dataptr(ckp, sp);
919 	n = sp->fcfs_size / sp->fcfs_entsize;
920 
921 	/*
922 	 * Hold the reader lock on log pointers to block log rotation during
923 	 * the section restore so that we can safely insert refs to d_errlog.
924 	 */
925 	(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
926 	errlp = fmd.d_errlog;
927 
928 	e_maj = major(errlp->log_stat.st_dev);
929 	e_min = minor(errlp->log_stat.st_dev);
930 	e_ino = errlp->log_stat.st_ino;
931 
932 	for (i = 0; i < n; i++) {
933 		ftv.ftv_sec = fcfe->fcfe_todsec;
934 		ftv.ftv_nsec = fcfe->fcfe_todnsec;
935 
936 		if (e_ino == fcfe->fcfe_inode &&
937 		    e_maj == fcfe->fcfe_major &&
938 		    e_min == fcfe->fcfe_minor)
939 			lp = errlp;
940 		else
941 			lp = NULL;
942 
943 		func(arg, fmd_event_recreate(FMD_EVT_PROTOCOL,
944 		    &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0));
945 
946 		fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
947 	}
948 
949 	(void) pthread_rwlock_unlock(&fmd.d_log_lock);
950 }
951 
952 static int
953 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
954 {
955 	const fcf_nvl_t *fcfn, *endn;
956 	const fcf_sec_t *sp;
957 	nvlist_t *nvl;
958 	int err, i;
959 
960 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
961 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
962 		    "invalid link to section %u: expected nvlists\n", sid);
963 	}
964 
965 	fcfn = fmd_ckpt_dataptr(ckp, sp);
966 	endn = fmd_ckpt_datalim(ckp, sp);
967 
968 	for (i = 0; fcfn < endn; i++) {
969 		char *data = (char *)fcfn + sp->fcfs_entsize;
970 		size_t size = (size_t)fcfn->fcfn_size;
971 
972 		if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
973 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
974 			    "size %u exceeds buffer\n", sid, i, size);
975 		}
976 
977 		if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
978 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
979 			    "unpack nvlist %u [%d]: %s\n", sid, i,
980 			    fmd_strerror(err));
981 		}
982 
983 		fmd_case_insert_suspect(cp, nvl);
984 
985 		size = sp->fcfs_entsize + fcfn->fcfn_size;
986 		size = P2ROUNDUP(size, sizeof (uint64_t));
987 		fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
988 	}
989 
990 	return (i);
991 }
992 
993 static void
994 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
995     fmd_case_t *cp, fcf_secidx_t sid)
996 {
997 	const fcf_sec_t *sp, *dsp;
998 	const fcf_buf_t *fcfb;
999 	uint_t i, n;
1000 
1001 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
1002 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1003 		    "invalid link to section %u: expected bufs\n", sid);
1004 	}
1005 
1006 	if (sp->fcfs_size == 0)
1007 		return; /* empty events section or type none */
1008 
1009 	fcfb = fmd_ckpt_dataptr(ckp, sp);
1010 	n = sp->fcfs_size / sp->fcfs_entsize;
1011 
1012 	for (i = 0; i < n; i++) {
1013 		dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1014 
1015 		if (dsp == NULL) {
1016 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1017 			    "buffer link %u\n", sid, fcfb->fcfb_data);
1018 		}
1019 
1020 		fmd_buf_write((fmd_hdl_t *)mp, cp,
1021 		    fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1022 		    ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1023 
1024 		fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1025 	}
1026 }
1027 
1028 static void
1029 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1030 {
1031 	const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1032 	const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1033 	fmd_case_t *cp;
1034 	int n;
1035 
1036 	if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
1037 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1038 		    "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1039 	}
1040 
1041 	fmd_module_lock(mp);
1042 
1043 	if ((cp = fmd_case_recreate(mp, NULL,
1044 	    FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
1045 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1046 		    "duplicate case uuid: %s\n", uuid);
1047 	}
1048 
1049 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1050 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
1051 
1052 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1053 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
1054 
1055 	n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1056 
1057 	if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1058 		fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1059 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
1060 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
1061 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
1062 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1063 
1064 	fmd_module_unlock(mp);
1065 	fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1066 }
1067 
1068 static void
1069 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1070 {
1071 	const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1072 	uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1073 	const fcf_sec_t *esp;
1074 	const char *s;
1075 
1076 	for (i = 0; i < n; i++) {
1077 		esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1078 
1079 		if (esp == NULL) {
1080 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1081 			    "invalid events link %u\n", fcfd->fcfd_events);
1082 		}
1083 
1084 		if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1085 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1086 			    "serd name %u is corrupt\n", fcfd->fcfd_name);
1087 		}
1088 
1089 		fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1090 		fmd_module_lock(mp);
1091 
1092 		fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1093 		    (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
1094 		    fmd_serd_eng_lookup(&mp->mod_serds, s));
1095 
1096 		fmd_module_unlock(mp);
1097 		fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1098 	}
1099 }
1100 
1101 static void
1102 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1103 {
1104 	const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1105 	const fcf_sec_t *sp;
1106 	uint_t i;
1107 
1108 	if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1109 	    strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1110 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1111 		    "checkpoint is not for module %s\n", mp->mod_name);
1112 	}
1113 
1114 	for (i = 0; i < ckp->ckp_secs; i++) {
1115 		sp = (void *)(ckp->ckp_buf +
1116 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1117 
1118 		switch (sp->fcfs_type) {
1119 		case FCF_SECT_CASE:
1120 			fmd_ckpt_restore_case(ckp, mp, sp);
1121 			break;
1122 		case FCF_SECT_SERD:
1123 			fmd_ckpt_restore_serd(ckp, mp, sp);
1124 			break;
1125 		}
1126 	}
1127 
1128 	fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1129 	mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1130 }
1131 
1132 /*
1133  * Restore a checkpoint for the specified module.  Any errors which occur
1134  * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1135  * either of which will automatically unlock the module and trigger an abort.
1136  */
1137 void
1138 fmd_ckpt_restore(fmd_module_t *mp)
1139 {
1140 	fmd_ckpt_t ckp;
1141 
1142 	if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1143 		return; /* never restore checkpoints for this module */
1144 
1145 	TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1146 
1147 	if (fmd_ckpt_open(&ckp, mp) == -1) {
1148 		if (errno != ENOENT)
1149 			fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1150 		TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1151 		return;
1152 	}
1153 
1154 	ASSERT(!fmd_module_locked(mp));
1155 	fmd_ckpt_restore_module(&ckp, mp);
1156 	fmd_ckpt_destroy(&ckp);
1157 	fmd_module_clrdirty(mp);
1158 
1159 	TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1160 	fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1161 }
1162 
1163 /*
1164  * Delete the module's checkpoint file.  This is used by the ckpt.zero property
1165  * code or by the fmadm reset RPC service path to force a checkpoint delete.
1166  */
1167 void
1168 fmd_ckpt_delete(fmd_module_t *mp)
1169 {
1170 	char path[PATH_MAX];
1171 
1172 	(void) snprintf(path, sizeof (path),
1173 	    "%s/%s", mp->mod_ckpt, mp->mod_name);
1174 
1175 	TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1176 
1177 	if (unlink(path) != 0 && errno != ENOENT)
1178 		fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1179 }
1180 
1181 /*
1182  * Move aside the module's checkpoint file if checkpoint restore has failed.
1183  * We rename the file rather than deleting it in the hopes that someone might
1184  * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1185  */
1186 void
1187 fmd_ckpt_rename(fmd_module_t *mp)
1188 {
1189 	char src[PATH_MAX], dst[PATH_MAX];
1190 
1191 	(void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1192 	(void) snprintf(dst, sizeof (dst), "%s-", src);
1193 
1194 	TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1195 
1196 	if (rename(src, dst) != 0 && errno != ENOENT)
1197 		fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1198 }
1199