xref: /titanic_41/usr/src/cmd/fm/fmd/common/fmd_ckpt.c (revision fa9e4066f08beec538e775443c5be79dd423fcab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/mkdev.h>
31 #include <sys/stat.h>
32 
33 #include <strings.h>
34 #include <unistd.h>
35 #include <limits.h>
36 #include <fcntl.h>
37 
38 #include <fmd_module.h>
39 #include <fmd_error.h>
40 #include <fmd_alloc.h>
41 #include <fmd_case.h>
42 #include <fmd_serd.h>
43 #include <fmd_subr.h>
44 #include <fmd_conf.h>
45 #include <fmd_event.h>
46 #include <fmd_log.h>
47 #include <fmd_api.h>
48 #include <fmd_ckpt.h>
49 
50 #include <fmd.h>
51 
52 #define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
53 #define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
54 
55 /*
56  * The fmd_ckpt_t structure is used to manage all of the state needed by the
57  * various subroutines that save and restore checkpoints.  The structure is
58  * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
59  * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
60  */
61 typedef struct fmd_ckpt {
62 	char ckp_src[PATH_MAX];	/* ckpt input or output filename */
63 	char ckp_dst[PATH_MAX];	/* ckpt rename filename */
64 	uchar_t *ckp_buf;	/* data buffer base address */
65 	fcf_hdr_t *ckp_hdr;	/* file header pointer */
66 	uchar_t *ckp_ptr;	/* data buffer pointer */
67 	size_t ckp_size;	/* data buffer size */
68 	fcf_sec_t *ckp_secp;	/* section header table pointer */
69 	fcf_sec_t *ckp_modp;	/* section header for module */
70 	uint_t ckp_secs;	/* number of sections */
71 	char *ckp_strs;		/* string table base pointer */
72 	char *ckp_strp;		/* string table pointer */
73 	size_t ckp_strn;	/* string table size */
74 	int ckp_fd;		/* output descriptor */
75 	fmd_module_t *ckp_mp;	/* checkpoint module */
76 	void *ckp_arg;		/* private arg for callbacks */
77 } fmd_ckpt_t;
78 
79 typedef struct fmd_ckpt_desc {
80 	uint64_t secd_size;	/* minimum section size */
81 	uint32_t secd_entsize;	/* minimum section entry size */
82 	uint32_t secd_align;	/* section alignment */
83 } fmd_ckpt_desc_t;
84 
85 /*
86  * Table of FCF section descriptions.  Here we record the minimum size for each
87  * section (for use during restore) and the expected entry size and alignment
88  * for each section (for use during both checkpoint and restore).
89  */
90 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
91 { 0, 0, sizeof (uint8_t) },					   /* NONE */
92 { 1, 0, sizeof (char) },					   /* STRTAB */
93 { sizeof (fcf_module_t), 0, sizeof (uint32_t) },		   /* MODULE */
94 { sizeof (fcf_case_t), 0, sizeof (uint32_t) },			   /* CASE */
95 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },	   /* BUFS */
96 { 0, 0, _MAX_ALIGNMENT },					   /* BUFFER */
97 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
98 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
99 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },	   /* NVLISTS */
100 };
101 
102 static int
103 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
104 {
105 	const char *dir = mp->mod_ckpt;
106 	const char *name = mp->mod_name;
107 	mode_t mode;
108 
109 	bzero(ckp, sizeof (fmd_ckpt_t));
110 	ckp->ckp_mp = mp;
111 
112 	ckp->ckp_size = sizeof (fcf_hdr_t);
113 	ckp->ckp_strn = 1; /* for \0 */
114 
115 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
116 	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
117 
118 	(void) unlink(ckp->ckp_src);
119 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
120 	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
121 
122 	return (ckp->ckp_fd);
123 }
124 
125 /*PRINTFLIKE2*/
126 static int
127 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
128 {
129 	va_list ap;
130 
131 	va_start(ap, format);
132 	fmd_verror(EFMD_CKPT_INVAL, format, ap);
133 	va_end(ap);
134 
135 	fmd_free(ckp->ckp_buf, ckp->ckp_size);
136 	return (fmd_set_errno(EFMD_CKPT_INVAL));
137 }
138 
139 static int
140 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
141 {
142 	struct stat64 st;
143 	uint64_t seclen;
144 	uint_t i;
145 	int err;
146 
147 	bzero(ckp, sizeof (fmd_ckpt_t));
148 	ckp->ckp_mp = mp;
149 
150 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
151 	    mp->mod_ckpt, mp->mod_name);
152 
153 	if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
154 		return (-1); /* failed to open checkpoint file */
155 
156 	if (fstat64(ckp->ckp_fd, &st) == -1) {
157 		err = errno;
158 		(void) close(ckp->ckp_fd);
159 		return (fmd_set_errno(err));
160 	}
161 
162 	ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
163 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
164 	ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
165 
166 	if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
167 	    ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
168 		err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
169 		fmd_free(ckp->ckp_buf, st.st_size);
170 		(void) close(ckp->ckp_fd);
171 		return (fmd_set_errno(err));
172 	}
173 
174 	(void) close(ckp->ckp_fd);
175 	ckp->ckp_fd = -1;
176 
177 	/*
178 	 * Once we've read in a consistent copy of the FCF file and we're sure
179 	 * the header can be accessed, go through it and make sure everything
180 	 * is valid.  We also check that unused bits are zero so we can expand
181 	 * to use them safely in the future and support old files if needed.
182 	 */
183 	if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
184 	    FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
185 		return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
186 
187 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
188 		return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
189 
190 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
191 		return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
192 
193 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
194 		return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
195 		    ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
196 	}
197 
198 	for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
199 		if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
200 			return (fmd_ckpt_inval(ckp,
201 			    "bad checkpoint padding at id[%d]", i));
202 		}
203 	}
204 
205 	if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
206 		return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
207 
208 	if (ckp->ckp_hdr->fcfh_pad != 0)
209 		return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
210 
211 	if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
212 	    ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
213 		return (fmd_ckpt_inval(ckp,
214 		    "bad header and/or section size\n"));
215 	}
216 
217 	seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
218 	    (uint64_t)ckp->ckp_hdr->fcfh_secsize;
219 
220 	if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
221 	    seclen > ckp->ckp_size ||
222 	    ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
223 	    ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
224 		return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
225 
226 	if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
227 	    !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
228 		return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
229 
230 	/*
231 	 * Once the header is validated, iterate over the section headers
232 	 * ensuring that each one is valid w.r.t. offset, alignment, and size.
233 	 * We also pick up the string table pointer during this pass.
234 	 */
235 	ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
236 	ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
237 
238 	for (i = 0; i < ckp->ckp_secs; i++) {
239 		fcf_sec_t *sp = (void *)(ckp->ckp_buf +
240 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
241 
242 		const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
243 
244 		if (sp->fcfs_flags != 0) {
245 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
246 			    "section flags (0x%x)\n", i, sp->fcfs_flags));
247 		}
248 
249 		if (sp->fcfs_align & (sp->fcfs_align - 1)) {
250 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
251 			    "alignment (%u)\n", i, sp->fcfs_align));
252 		}
253 
254 		if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
255 			return (fmd_ckpt_inval(ckp, "section %u is not properly"
256 			    " aligned (offset %llu)\n", i, sp->fcfs_offset));
257 		}
258 
259 		if (sp->fcfs_entsize != 0 &&
260 		    (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
261 			return (fmd_ckpt_inval(ckp, "section %u has misaligned "
262 			    "entsize %u\n", i, sp->fcfs_entsize));
263 		}
264 
265 		if (sp->fcfs_offset > ckp->ckp_size ||
266 		    sp->fcfs_size > ckp->ckp_size ||
267 		    sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
268 		    sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
269 			return (fmd_ckpt_inval(ckp, "section %u has corrupt "
270 			    "size or offset\n", i));
271 		}
272 
273 		if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
274 		    sizeof (_fmd_ckpt_sections[0])) {
275 			return (fmd_ckpt_inval(ckp, "section %u has unknown "
276 			    "section type %u\n", i, sp->fcfs_type));
277 		}
278 
279 		if (sp->fcfs_align != dp->secd_align) {
280 			return (fmd_ckpt_inval(ckp, "section %u has align %u "
281 			    "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
282 		}
283 
284 		if (sp->fcfs_size < dp->secd_size ||
285 		    sp->fcfs_entsize < dp->secd_entsize) {
286 			return (fmd_ckpt_inval(ckp, "section %u has short "
287 			    "size or entsize\n", i));
288 		}
289 
290 		switch (sp->fcfs_type) {
291 		case FCF_SECT_STRTAB:
292 			if (ckp->ckp_strs != NULL) {
293 				return (fmd_ckpt_inval(ckp, "multiple string "
294 				    "tables are present in checkpoint file\n"));
295 			}
296 
297 			ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
298 			ckp->ckp_strn = sp->fcfs_size;
299 
300 			if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
301 				return (fmd_ckpt_inval(ckp, "string table %u "
302 				    "is missing terminating nul byte\n", i));
303 			}
304 			break;
305 
306 		case FCF_SECT_MODULE:
307 			if (ckp->ckp_modp != NULL) {
308 				return (fmd_ckpt_inval(ckp, "multiple module "
309 				    "sects are present in checkpoint file\n"));
310 			}
311 			ckp->ckp_modp = sp;
312 			break;
313 		}
314 	}
315 
316 	/*
317 	 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
318 	 * This is done to ensure that links can use index 0 as a null section.
319 	 */
320 	if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
321 	    ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
322 		return (fmd_ckpt_inval(ckp, "section 0 is not of the "
323 		    "appropriate size and/or attributes (SECT_NONE)\n"));
324 	}
325 
326 	if (ckp->ckp_modp == NULL) {
327 		return (fmd_ckpt_inval(ckp,
328 		    "no module section found in file\n"));
329 	}
330 
331 	return (0);
332 }
333 
334 static void
335 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
336 {
337 	if (ckp->ckp_buf != NULL)
338 		fmd_free(ckp->ckp_buf, ckp->ckp_size);
339 	if (ckp->ckp_fd >= 0)
340 		(void) close(ckp->ckp_fd);
341 }
342 
343 /*
344  * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
345  * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
346  * then aborts the API call and the surrounding module entry point by doing an
347  * fmd_module_abort(), which longjmps to the place where we entered the module.
348  * Depending on the type of error and conf settings, we will reset or fail.
349  */
350 /*PRINTFLIKE3*/
351 static void
352 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
353 {
354 	fmd_module_t *mp = ckp->ckp_mp;
355 	va_list ap;
356 
357 	va_start(ap, format);
358 	fmd_verror(err, format, ap);
359 	va_end(ap);
360 
361 	if (fmd_module_locked(mp))
362 		fmd_module_unlock(mp);
363 
364 	fmd_ckpt_destroy(ckp);
365 	fmd_module_abort(mp, err);
366 }
367 
368 static fcf_secidx_t
369 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
370 {
371 	const fmd_ckpt_desc_t *dp;
372 
373 	ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
374 	dp = &_fmd_ckpt_sections[type];
375 
376 	ckp->ckp_ptr = (uchar_t *)
377 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
378 
379 	ckp->ckp_secp->fcfs_type = type;
380 	ckp->ckp_secp->fcfs_align = dp->secd_align;
381 	ckp->ckp_secp->fcfs_flags = 0;
382 	ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
383 	ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
384 	ckp->ckp_secp->fcfs_size = size;
385 
386 	/*
387 	 * If the data pointer is non-NULL, copy the data to our buffer; else
388 	 * the caller is responsible for doing so and updating ckp->ckp_ptr.
389 	 */
390 	if (data != NULL) {
391 		bcopy(data, ckp->ckp_ptr, size);
392 		ckp->ckp_ptr += size;
393 	}
394 
395 	ckp->ckp_secp++;
396 	return (ckp->ckp_secs++);
397 }
398 
399 static fcf_stridx_t
400 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
401 {
402 	fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
403 
404 	(void) strcpy(ckp->ckp_strp, s);
405 	ckp->ckp_strp += strlen(s) + 1;
406 
407 	return (idx);
408 }
409 
410 static int
411 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
412 {
413 	/*
414 	 * We've added up all the sections by now: add two more for SECT_NONE
415 	 * and SECT_STRTAB, and add the size of the section header table and
416 	 * string table to the total size.  We know that the fcf_hdr_t is
417 	 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
418 	 * is aligned so that any section can follow it, so no extra padding
419 	 * bytes need to be allocated between any of these items.
420 	 */
421 	ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
422 	ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
423 	ckp->ckp_size += ckp->ckp_strn;
424 
425 	TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
426 	ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
427 
428 	if (ckp->ckp_buf == NULL)
429 		return (-1); /* errno is set for us */
430 
431 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
432 
433 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
434 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
435 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
436 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
437 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
438 	ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
439 	ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
440 
441 	ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
442 	ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
443 	ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
444 	ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
445 	ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
446 	ckp->ckp_hdr->fcfh_cgen = gen;
447 
448 	ckp->ckp_secs = 0; /* reset section counter for second pass */
449 	ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
450 	ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
451 	ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
452 	ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
453 
454 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
455 	return (0);
456 }
457 
458 static int
459 fmd_ckpt_commit(fmd_ckpt_t *ckp)
460 {
461 	fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
462 	size_t stroff = ckp->ckp_size - ckp->ckp_strn;
463 
464 	/*
465 	 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
466 	 * and current pointer locations all add up appropriately.  Any ASSERTs
467 	 * which trip here likely indicate an inconsistency in the code for the
468 	 * reservation pass and the buffer update pass of the FCF subroutines.
469 	 */
470 	ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
471 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
472 	ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
473 
474 	ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
475 	ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
476 	ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
477 
478 	if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
479 	    fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
480 		return (-1); /* errno is set for us */
481 
482 	ckp->ckp_fd = -1; /* fd is now closed */
483 	return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
484 }
485 
486 static void
487 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
488 {
489 	if (size != 0) {
490 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
491 		ckp->ckp_secs++;
492 	}
493 }
494 
495 static void
496 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
497 {
498 	ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
499 	ckp->ckp_strn += strlen(bp->buf_name) + 1;
500 	ckp->ckp_secs++;
501 }
502 
503 static void
504 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
505 {
506 	fcf_buf_t *fcfb = ckp->ckp_arg;
507 
508 	fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
509 	fcfb->fcfb_data = fmd_ckpt_section(ckp,
510 	    bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
511 
512 	ckp->ckp_arg = fcfb + 1;
513 }
514 
515 static void
516 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
517 {
518 	fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
519 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
520 	fmd_log_t *lp = ep->ev_log;
521 
522 	fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
523 	fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
524 	fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
525 	fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
526 	fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
527 	fcfe->fcfe_offset = ep->ev_off;
528 
529 	ckp->ckp_ptr += sizeof (fcf_event_t);
530 }
531 
532 static void
533 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
534 {
535 	fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
536 	char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
537 	size_t nvsize = 0;
538 
539 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
540 	fcfn->fcfn_size = (uint64_t)nvsize;
541 
542 	(void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
543 	ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
544 
545 	ckp->ckp_ptr = (uchar_t *)
546 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
547 }
548 
549 static void
550 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
551 {
552 	fmd_ckpt_resv(ckp,
553 	    sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
554 
555 	ckp->ckp_strn += strlen(sgp->sg_name) + 1;
556 }
557 
558 static void
559 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
560 {
561 	fcf_serd_t *fcfd = ckp->ckp_arg;
562 	fcf_secidx_t evsec = FCF_SECT_NONE;
563 	fmd_serd_elem_t *sep;
564 
565 	if (sgp->sg_count != 0) {
566 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
567 		    sizeof (fcf_event_t) * sgp->sg_count);
568 
569 		for (sep = fmd_list_next(&sgp->sg_list);
570 		    sep != NULL; sep = fmd_list_next(sep))
571 			fmd_ckpt_save_event(ckp, sep->se_event);
572 	}
573 
574 	fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
575 	fcfd->fcfd_events = evsec;
576 	fcfd->fcfd_pad = 0;
577 	fcfd->fcfd_n = sgp->sg_n;
578 	fcfd->fcfd_t = sgp->sg_t;
579 
580 	ckp->ckp_arg = fcfd + 1;
581 }
582 
583 static void
584 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
585 {
586 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
587 	fmd_case_susp_t *cis;
588 	uint_t n;
589 
590 	n = fmd_buf_hash_count(&cip->ci_bufs);
591 	fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
592 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
593 
594 	if (cip->ci_principal != NULL)
595 		fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
596 
597 	fmd_ckpt_resv(ckp,
598 	    sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
599 
600 	if (cip->ci_nsuspects != 0)
601 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
602 
603 	cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
604 
605 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
606 		size_t nvsize = 0;
607 
608 		(void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
609 		cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
610 		cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
611 	}
612 
613 	fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
614 	fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
615 	ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
616 }
617 
618 static void
619 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
620 {
621 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
622 
623 	fmd_case_item_t *cit;
624 	fmd_case_susp_t *cis;
625 	fcf_case_t fcfc;
626 	uint_t n;
627 
628 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
629 	fcf_secidx_t evsec = FCF_SECIDX_NONE;
630 	fcf_secidx_t nvsec = FCF_SECIDX_NONE;
631 	fcf_secidx_t prsec = FCF_SECIDX_NONE;
632 
633 	if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
634 		size_t size = sizeof (fcf_buf_t) * n;
635 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
636 
637 		fmd_buf_hash_apply(&cip->ci_bufs,
638 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
639 
640 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
641 		fmd_free(bufs, size);
642 	}
643 
644 	if (cip->ci_principal != NULL) {
645 		prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
646 		    sizeof (fcf_event_t));
647 
648 		fmd_ckpt_save_event(ckp, cip->ci_principal);
649 	}
650 
651 	if (cip->ci_nitems != 0) {
652 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
653 		    sizeof (fcf_event_t) * cip->ci_nitems);
654 
655 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
656 			fmd_ckpt_save_event(ckp, cit->cit_event);
657 	}
658 
659 	if (cip->ci_nsuspects != 0) {
660 		nvsec = fmd_ckpt_section(ckp, NULL,
661 		    FCF_SECT_NVLISTS, cip->ci_nvsz);
662 
663 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
664 			fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
665 	}
666 
667 	fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
668 	fcfc.fcfc_bufs = bufsec;
669 	fcfc.fcfc_principal = prsec;
670 	fcfc.fcfc_events = evsec;
671 	fcfc.fcfc_suspects = nvsec;
672 
673 	switch (cip->ci_state) {
674 	case FMD_CASE_UNSOLVED:
675 		fcfc.fcfc_state = FCF_CASE_UNSOLVED;
676 		break;
677 	case FMD_CASE_SOLVED:
678 		fcfc.fcfc_state = FCF_CASE_SOLVED;
679 		break;
680 	case FMD_CASE_CLOSED:
681 		fcfc.fcfc_state = FCF_CASE_CLOSED;
682 		break;
683 	default:
684 		fmd_panic("case %p (%s) has invalid state %u",
685 		    (void *)cp, cip->ci_uuid, cip->ci_state);
686 	}
687 
688 	(void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
689 }
690 
691 static void
692 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
693 {
694 	fmd_case_t *cp;
695 	uint_t n;
696 
697 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
698 		fmd_ckpt_resv_case(ckp, cp);
699 
700 	n = fmd_serd_hash_count(&mp->mod_serds);
701 	fmd_serd_hash_apply(&mp->mod_serds,
702 	    (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
703 	fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
704 
705 	n = fmd_buf_hash_count(&mp->mod_bufs);
706 	fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
707 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
708 
709 	fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
710 	ckp->ckp_strn += strlen(mp->mod_name) + 1;
711 	ckp->ckp_strn += strlen(mp->mod_path) + 1;
712 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
713 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
714 }
715 
716 static void
717 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
718 {
719 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
720 	fcf_module_t fcfm;
721 	fmd_case_t *cp;
722 	uint_t n;
723 
724 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
725 		fmd_ckpt_save_case(ckp, cp);
726 
727 	if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
728 		size_t size = sizeof (fcf_serd_t) * n;
729 		fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
730 
731 		fmd_serd_hash_apply(&mp->mod_serds,
732 		    (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
733 
734 		(void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
735 		fmd_free(serds, size);
736 	}
737 
738 	if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
739 		size_t size = sizeof (fcf_buf_t) * n;
740 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
741 
742 		fmd_buf_hash_apply(&mp->mod_bufs,
743 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
744 
745 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
746 		fmd_free(bufs, size);
747 	}
748 
749 	fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
750 	fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
751 	fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
752 	fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
753 	fcfm.fcfm_bufs = bufsec;
754 
755 	(void) fmd_ckpt_section(ckp, &fcfm,
756 	    FCF_SECT_MODULE, sizeof (fcf_module_t));
757 }
758 
759 void
760 fmd_ckpt_save(fmd_module_t *mp)
761 {
762 	struct stat64 st;
763 	char path[PATH_MAX];
764 	mode_t dirmode;
765 
766 	hrtime_t now = gethrtime();
767 	fmd_ckpt_t ckp;
768 	int err;
769 
770 	ASSERT(fmd_module_locked(mp));
771 
772 	/*
773 	 * If checkpointing is disabled for the module, just return.  We must
774 	 * commit the module state anyway to transition pending log events.
775 	 */
776 	if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
777 		fmd_module_commit(mp);
778 		return;
779 	}
780 
781 	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
782 		return; /* no checkpoint is necessary for this module */
783 
784 	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
785 	    mp->mod_name, mp->mod_gen + 1));
786 
787 	/*
788 	 * If the per-module checkpoint directory isn't found or isn't of type
789 	 * directory, move aside whatever is there (if anything) and attempt
790 	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
791 	 * have no choice but to abort the checkpoint and try again later.
792 	 */
793 	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
794 		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
795 		(void) rename(mp->mod_ckpt, path);
796 		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
797 
798 		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
799 			fmd_error(EFMD_CKPT_MKDIR,
800 			    "failed to mkdir %s", mp->mod_ckpt);
801 			return; /* return without clearing dirty bits */
802 		}
803 	}
804 
805 	/*
806 	 * Create a temporary file to write out the checkpoint into, and create
807 	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
808 	 * then figure out how much space will be required, and allocate it.
809 	 */
810 	if (fmd_ckpt_create(&ckp, mp) == -1) {
811 		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
812 		return;
813 	}
814 
815 	fmd_ckpt_resv_module(&ckp, mp);
816 
817 	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
818 		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
819 		fmd_ckpt_destroy(&ckp);
820 		return;
821 	}
822 
823 	/*
824 	 * Fill in the checkpoint content, write it to disk, sync it, and then
825 	 * atomically rename it to the destination path.  If this fails, we
826 	 * have no choice but to leave all our dirty bits set and return.
827 	 */
828 	fmd_ckpt_save_module(&ckp, mp);
829 	err = fmd_ckpt_commit(&ckp);
830 	fmd_ckpt_destroy(&ckp);
831 
832 	if (err != 0) {
833 		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
834 		return; /* return without clearing dirty bits */
835 	}
836 
837 	fmd_module_commit(mp);
838 	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
839 
840 	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
841 	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
842 
843 	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
844 	    mp->mod_name, mp->mod_gen);
845 }
846 
847 /*
848  * Utility function to retrieve a pointer to a section's header and verify that
849  * it is of the expected type or it is a FCF_SECT_NONE reference.
850  */
851 static const fcf_sec_t *
852 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
853 {
854 	const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
855 	    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
856 
857 	return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
858 	    sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
859 }
860 
861 /*
862  * Utility function to retrieve the data pointer for a particular section.  The
863  * validity of the header values has already been checked by fmd_ckpt_open().
864  */
865 static const void *
866 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
867 {
868 	return (ckp->ckp_buf + sp->fcfs_offset);
869 }
870 
871 /*
872  * Utility function to retrieve the end of the data region for a particular
873  * section.  The validity of this value has been confirmed by fmd_ckpt_open().
874  */
875 static const void *
876 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
877 {
878 	return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
879 }
880 
881 /*
882  * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
883  * index is valid, the string data is returned; otherwise 'defstr' is returned.
884  */
885 static const char *
886 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
887 {
888 	return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
889 }
890 
891 static void
892 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
893     void (*func)(void *, fmd_event_t *), void *arg)
894 {
895 	const fcf_event_t *fcfe;
896 	const fcf_sec_t *sp;
897 	fmd_timeval_t ftv;
898 	fmd_log_t *lp, *errlp;
899 	uint_t i, n;
900 	uint32_t e_maj, e_min;
901 	uint64_t e_ino;
902 
903 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
904 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
905 		    "invalid link to section %u: expected events\n", sid);
906 	}
907 
908 	if (sp->fcfs_size == 0)
909 		return; /* empty events section or type none */
910 
911 	fcfe = fmd_ckpt_dataptr(ckp, sp);
912 	n = sp->fcfs_size / sp->fcfs_entsize;
913 
914 	/*
915 	 * Hold the reader lock on log pointers to block log rotation during
916 	 * the section restore so that we can safely insert refs to d_errlog.
917 	 */
918 	(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
919 	errlp = fmd.d_errlog;
920 
921 	e_maj = major(errlp->log_stat.st_dev);
922 	e_min = minor(errlp->log_stat.st_dev);
923 	e_ino = errlp->log_stat.st_ino;
924 
925 	for (i = 0; i < n; i++) {
926 		ftv.ftv_sec = fcfe->fcfe_todsec;
927 		ftv.ftv_nsec = fcfe->fcfe_todnsec;
928 
929 		if (e_ino == fcfe->fcfe_inode &&
930 		    e_maj == fcfe->fcfe_major &&
931 		    e_min == fcfe->fcfe_minor)
932 			lp = errlp;
933 		else
934 			lp = NULL;
935 
936 		func(arg, fmd_event_recreate(FMD_EVT_PROTOCOL,
937 		    &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0));
938 
939 		fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
940 	}
941 
942 	(void) pthread_rwlock_unlock(&fmd.d_log_lock);
943 }
944 
945 static void
946 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
947 {
948 	const fcf_nvl_t *fcfn, *endn;
949 	const fcf_sec_t *sp;
950 	nvlist_t *nvl;
951 	int err, i;
952 
953 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
954 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
955 		    "invalid link to section %u: expected nvlists\n", sid);
956 	}
957 
958 	fcfn = fmd_ckpt_dataptr(ckp, sp);
959 	endn = fmd_ckpt_datalim(ckp, sp);
960 
961 	for (i = 0; fcfn < endn; i++) {
962 		char *data = (char *)fcfn + sp->fcfs_entsize;
963 		size_t size = (size_t)fcfn->fcfn_size;
964 
965 		if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
966 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
967 			    "size %u exceeds buffer\n", sid, i, size);
968 		}
969 
970 		if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
971 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
972 			    "unpack nvlist %u [%d]: %s\n", sid, i,
973 			    fmd_strerror(err));
974 		}
975 
976 		fmd_case_insert_suspect(cp, nvl);
977 
978 		size = sp->fcfs_entsize + fcfn->fcfn_size;
979 		size = P2ROUNDUP(size, sizeof (uint64_t));
980 		fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
981 	}
982 }
983 
984 static void
985 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
986     fmd_case_t *cp, fcf_secidx_t sid)
987 {
988 	const fcf_sec_t *sp, *dsp;
989 	const fcf_buf_t *fcfb;
990 	uint_t i, n;
991 
992 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
993 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
994 		    "invalid link to section %u: expected bufs\n", sid);
995 	}
996 
997 	if (sp->fcfs_size == 0)
998 		return; /* empty events section or type none */
999 
1000 	fcfb = fmd_ckpt_dataptr(ckp, sp);
1001 	n = sp->fcfs_size / sp->fcfs_entsize;
1002 
1003 	for (i = 0; i < n; i++) {
1004 		dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1005 
1006 		if (dsp == NULL) {
1007 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1008 			    "buffer link %u\n", sid, fcfb->fcfb_data);
1009 		}
1010 
1011 		fmd_buf_write((fmd_hdl_t *)mp, cp,
1012 		    fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1013 		    ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1014 
1015 		fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1016 	}
1017 }
1018 
1019 static void
1020 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1021 {
1022 	const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1023 	const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1024 	fmd_case_t *cp;
1025 
1026 	if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSED) {
1027 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1028 		    "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1029 	}
1030 
1031 	fmd_module_lock(mp);
1032 
1033 	if ((cp = fmd_case_recreate(mp, uuid)) == NULL) {
1034 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1035 		    "duplicate case uuid: %s\n", uuid);
1036 	}
1037 
1038 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1039 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
1040 
1041 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1042 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
1043 
1044 	fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1045 
1046 	if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1047 		fmd_case_transition(cp, FMD_CASE_SOLVED);
1048 	else if (fcfc->fcfc_state == FMD_CASE_CLOSED)
1049 		fmd_case_transition(cp, FMD_CASE_CLOSED);
1050 
1051 	fmd_module_unlock(mp);
1052 	fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1053 }
1054 
1055 static void
1056 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1057 {
1058 	const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1059 	uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1060 	const fcf_sec_t *esp;
1061 	const char *s;
1062 
1063 	for (i = 0; i < n; i++) {
1064 		esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1065 
1066 		if (esp == NULL) {
1067 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1068 			    "invalid events link %u\n", fcfd->fcfd_events);
1069 		}
1070 
1071 		if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1072 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1073 			    "serd name %u is corrupt\n", fcfd->fcfd_name);
1074 		}
1075 
1076 		fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1077 		fmd_module_lock(mp);
1078 
1079 		fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1080 		    (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
1081 		    fmd_serd_eng_lookup(&mp->mod_serds, s));
1082 
1083 		fmd_module_unlock(mp);
1084 		fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1085 	}
1086 }
1087 
1088 static void
1089 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1090 {
1091 	const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1092 	const fcf_sec_t *sp;
1093 	uint_t i;
1094 
1095 	if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1096 	    strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1097 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1098 		    "checkpoint is not for module %s\n", mp->mod_name);
1099 	}
1100 
1101 	for (i = 0; i < ckp->ckp_secs; i++) {
1102 		sp = (void *)(ckp->ckp_buf +
1103 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1104 
1105 		switch (sp->fcfs_type) {
1106 		case FCF_SECT_CASE:
1107 			fmd_ckpt_restore_case(ckp, mp, sp);
1108 			break;
1109 		case FCF_SECT_SERD:
1110 			fmd_ckpt_restore_serd(ckp, mp, sp);
1111 			break;
1112 		}
1113 	}
1114 
1115 	fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1116 	mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1117 }
1118 
1119 /*
1120  * Restore a checkpoint for the specified module.  Any errors which occur
1121  * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1122  * either of which will automatically unlock the module and trigger an abort.
1123  */
1124 void
1125 fmd_ckpt_restore(fmd_module_t *mp)
1126 {
1127 	fmd_ckpt_t ckp;
1128 
1129 	if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1130 		return; /* never restore checkpoints for this module */
1131 
1132 	TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1133 
1134 	if (fmd_ckpt_open(&ckp, mp) == -1) {
1135 		if (errno != ENOENT)
1136 			fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1137 		TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1138 		return;
1139 	}
1140 
1141 	ASSERT(!fmd_module_locked(mp));
1142 	fmd_ckpt_restore_module(&ckp, mp);
1143 	fmd_ckpt_destroy(&ckp);
1144 	fmd_module_clrdirty(mp);
1145 
1146 	TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1147 	fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1148 }
1149 
1150 /*
1151  * Delete the module's checkpoint file.  This is used by the ckpt.zero property
1152  * code or by the fmadm reset RPC service path to force a checkpoint delete.
1153  */
1154 void
1155 fmd_ckpt_delete(fmd_module_t *mp)
1156 {
1157 	char path[PATH_MAX];
1158 
1159 	(void) snprintf(path, sizeof (path),
1160 	    "%s/%s", mp->mod_ckpt, mp->mod_name);
1161 
1162 	TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1163 
1164 	if (unlink(path) != 0 && errno != ENOENT)
1165 		fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1166 }
1167 
1168 /*
1169  * Move aside the module's checkpoint file if checkpoint restore has failed.
1170  * We rename the file rather than deleting it in the hopes that someone might
1171  * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1172  */
1173 void
1174 fmd_ckpt_rename(fmd_module_t *mp)
1175 {
1176 	char src[PATH_MAX], dst[PATH_MAX];
1177 
1178 	(void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1179 	(void) snprintf(dst, sizeof (dst), "%s-", src);
1180 
1181 	TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1182 
1183 	if (rename(src, dst) != 0 && errno != ENOENT)
1184 		fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1185 }
1186