xref: /titanic_44/usr/src/uts/sun4u/os/cpr_impl.c (revision d1a180b0452ce86577a43be3245d2eacdeec1a34)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Platform specific implementation code
31  */
32 
33 #define	SUNDDI_IMPL
34 
35 #include <sys/types.h>
36 #include <sys/promif.h>
37 #include <sys/prom_isa.h>
38 #include <sys/prom_plat.h>
39 #include <sys/mmu.h>
40 #include <vm/hat_sfmmu.h>
41 #include <sys/iommu.h>
42 #include <sys/scb.h>
43 #include <sys/cpuvar.h>
44 #include <sys/intreg.h>
45 #include <sys/pte.h>
46 #include <vm/hat.h>
47 #include <vm/page.h>
48 #include <vm/as.h>
49 #include <sys/cpr.h>
50 #include <sys/kmem.h>
51 #include <sys/clock.h>
52 #include <sys/kmem.h>
53 #include <sys/panic.h>
54 #include <vm/seg_kmem.h>
55 #include <sys/cpu_module.h>
56 #include <sys/callb.h>
57 #include <sys/machsystm.h>
58 #include <sys/vmsystm.h>
59 #include <sys/systm.h>
60 #include <sys/archsystm.h>
61 #include <sys/stack.h>
62 #include <sys/fs/ufs_fs.h>
63 #include <sys/memlist.h>
64 #include <sys/bootconf.h>
65 #include <sys/thread.h>
66 #include <vm/vm_dep.h>
67 
68 extern	void cpr_clear_bitmaps(void);
69 extern	void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
70 extern	void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
71 
72 static	int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
73 static	void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
74 static	caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
75 static	int cpr_dump_sensitive(vnode_t *, csd_t *);
76 static	void i_cpr_clear_entries(uint64_t, uint64_t);
77 static	void i_cpr_xcall(xcfunc_t);
78 
79 void	i_cpr_storage_free(void);
80 
81 extern void *i_cpr_data_page;
82 extern int cpr_test_mode;
83 extern int cpr_nbitmaps;
84 extern char cpr_default_path[];
85 extern caddr_t textva, datava;
86 
87 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
88 caddr_t cpr_vaddr = NULL;
89 
90 static	uint_t sensitive_pages_saved;
91 static	uint_t sensitive_size_saved;
92 
93 caddr_t	i_cpr_storage_data_base;
94 caddr_t	i_cpr_storage_data_end;
95 csd_t *i_cpr_storage_desc_base;
96 csd_t *i_cpr_storage_desc_end;		/* one byte beyond last used descp */
97 csd_t *i_cpr_storage_desc_last_used;	/* last used descriptor */
98 caddr_t sensitive_write_ptr;		/* position for next storage write */
99 
100 size_t	i_cpr_sensitive_bytes_dumped;
101 pgcnt_t	i_cpr_sensitive_pgs_dumped;
102 pgcnt_t	i_cpr_storage_data_sz;		/* in pages */
103 pgcnt_t	i_cpr_storage_desc_pgcnt;	/* in pages */
104 
105 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
106 static	csu_md_t m_info;
107 
108 
109 #define	MAX_STORAGE_RETRY	3
110 #define	MAX_STORAGE_ALLOC_RETRY	3
111 #define	INITIAL_ALLOC_PCNT	40	/* starting allocation percentage */
112 #define	INTEGRAL		100	/* to get 1% precision */
113 
114 #define	EXTRA_RATE		2	/* add EXTRA_RATE% extra space */
115 #define	EXTRA_DESCS		10
116 
117 #define	CPR_NO_STORAGE_DESC	1
118 #define	CPR_NO_STORAGE_DATA	2
119 
120 #define	CIF_SPLICE		0
121 #define	CIF_UNLINK		1
122 
123 
124 /*
125  * CPR miscellaneous support routines
126  */
127 #define	cpr_open(path, mode,  vpp)	(vn_open(path, UIO_SYSSPACE, \
128 		mode, 0600, vpp, CRCREAT, 0))
129 #define	cpr_rdwr(rw, vp, basep, cnt)	(vn_rdwr(rw, vp,  (caddr_t)(basep), \
130 		cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
131 		(ssize_t *)NULL))
132 
133 /*
134  * definitions for saving/restoring prom pages
135  */
136 static void	*ppage_buf;
137 static pgcnt_t	ppage_count;
138 static pfn_t	*pphys_list;
139 static size_t	pphys_list_size;
140 
141 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
142 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
143 
144 /*
145  * private struct for tlb handling
146  */
147 struct cpr_trans_info {
148 	sutlb_t		*dst;
149 	sutlb_t		*tail;
150 	tlb_rw_t	reader;
151 	tlb_rw_t	writer;
152 	tlb_filter_t	filter;
153 	int		index;
154 	uint64_t	skip;		/* assumes TLB <= 64 locked entries */
155 };
156 typedef struct cpr_trans_info cti_t;
157 
158 
159 /*
160  * special handling for tlb info
161  */
162 #define	WITHIN_OFW(va) \
163 	(((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
164 
165 #define	WITHIN_NUCLEUS(va, base) \
166 	(((va) >= (base)) && \
167 	(((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
168 
169 #define	IS_BIGKTSB(va) \
170 	(enable_bigktsb && \
171 	((va) >= (uint64_t)ktsb_base) && \
172 	((va) < (uint64_t)(ktsb_base + ktsb_sz)))
173 
174 
175 /*
176  * WARNING:
177  * the text from this file is linked to follow cpr_resume_setup.o;
178  * only add text between here and i_cpr_end_jumpback when it needs
179  * to be called during resume before we switch back to the kernel
180  * trap table.  all the text in this range must fit within a page.
181  */
182 
183 
184 /*
185  * each time a machine is reset, the prom uses an inconsistent set of phys
186  * pages and the cif cookie may differ as well.  so prior to restoring the
187  * original prom, we have to use to use the new/tmp prom's translations
188  * when requesting prom services.
189  *
190  * cif_handler starts out as the original prom cookie, and that gets used
191  * by client_handler() to jump into the prom.  here we splice-in a wrapper
192  * routine by writing cif_handler; client_handler() will now jump to the
193  * wrapper which switches the %tba to the new/tmp prom's trap table then
194  * jumps to the new cookie.
195  */
196 void
197 i_cpr_cif_setup(int action)
198 {
199 	extern void *i_cpr_orig_cif, *cif_handler;
200 	extern int i_cpr_cif_wrapper(void *);
201 
202 	/*
203 	 * save the original cookie and change the current cookie to the
204 	 * wrapper routine.  later we just restore the original cookie.
205 	 */
206 	if (action == CIF_SPLICE) {
207 		i_cpr_orig_cif = cif_handler;
208 		cif_handler = (void *)i_cpr_cif_wrapper;
209 	} else if (action == CIF_UNLINK)
210 		cif_handler = i_cpr_orig_cif;
211 }
212 
213 
214 /*
215  * launch slave cpus into kernel text, pause them,
216  * and restore the original prom pages
217  */
218 void
219 i_cpr_mp_setup(void)
220 {
221 	extern void restart_other_cpu(int);
222 	ihandle_t tmpout = 0;
223 	char *str;
224 	cpu_t *cp;
225 
226 	uint64_t kctx = kcontextreg;
227 
228 	/*
229 	 * Do not allow setting page size codes in MMU primary context
230 	 * register while using cif wrapper. This is needed to work
231 	 * arround OBP incorrect handling of this MMU register.
232 	 */
233 	kcontextreg = 0;
234 
235 	/*
236 	 * reset cpu_ready_set so x_calls work properly
237 	 */
238 	CPUSET_ZERO(cpu_ready_set);
239 	CPUSET_ADD(cpu_ready_set, getprocessorid());
240 
241 	/*
242 	 * setup cif to use the cookie from the new/tmp prom
243 	 * and setup tmp handling for calling prom services.
244 	 */
245 	i_cpr_cif_setup(CIF_SPLICE);
246 
247 	/*
248 	 * at this point, only the nucleus and a few cpr pages are
249 	 * mapped in.  once we switch to the kernel trap table,
250 	 * we can access the rest of kernel space.
251 	 */
252 	prom_set_traptable(&trap_table);
253 
254 	if (ncpus > 1) {
255 		sfmmu_init_tsbs();
256 
257 		if (cpr_debug & LEVEL1) {
258 			prom_interpret("stdout @ swap l!", (uintptr_t)&tmpout,
259 			    0, 0, 0, 0);
260 			str = "MP startup...\r\n";
261 			(void) prom_write(tmpout, str, strlen(str), 0, 0);
262 		}
263 
264 		mutex_enter(&cpu_lock);
265 		/*
266 		 * All of the slave cpus are not ready at this time,
267 		 * yet the cpu structures have various cpu_flags set;
268 		 * clear cpu_flags and mutex_ready.
269 		 * Since we are coming up from a CPU suspend, the slave cpus
270 		 * are frozen.
271 		 */
272 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
273 			cp->cpu_flags = CPU_FROZEN;
274 			cp->cpu_m.mutex_ready = 0;
275 		}
276 
277 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
278 			restart_other_cpu(cp->cpu_id);
279 
280 		pause_cpus(NULL);
281 		mutex_exit(&cpu_lock);
282 
283 		if (cpr_debug & LEVEL1) {
284 			str = "MP paused...\r\n";
285 			(void) prom_write(tmpout, str, strlen(str), 0, 0);
286 		}
287 
288 		i_cpr_xcall(i_cpr_clear_entries);
289 	} else
290 		i_cpr_clear_entries(0, 0);
291 
292 	/*
293 	 * now unlink the cif wrapper;  WARNING: do not call any
294 	 * prom_xxx() routines until after prom pages are restored.
295 	 */
296 	i_cpr_cif_setup(CIF_UNLINK);
297 
298 	(void) i_cpr_prom_pages(CPR_PROM_RESTORE);
299 
300 	/* allow setting page size codes in MMU primary context register */
301 	kcontextreg = kctx;
302 }
303 
304 
305 /*
306  * end marker for jumpback page;
307  * this symbol is used to check the size of i_cpr_resume_setup()
308  * and the above text.  For simplicity, the Makefile needs to
309  * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
310  */
311 void
312 i_cpr_end_jumpback(void)
313 {
314 }
315 
316 
317 /*
318  * scan tlb entries with reader; when valid entries are found,
319  * the filter routine will selectively save/clear them
320  */
321 static void
322 i_cpr_scan_tlb(cti_t *ctip)
323 {
324 	uint64_t va_tag;
325 	int tlb_index;
326 	tte_t tte;
327 
328 	for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
329 		(*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
330 		if (va_tag && TTE_IS_VALID(&tte))
331 			(*ctip->filter)(tlb_index, &tte, va_tag, ctip);
332 	}
333 }
334 
335 
336 /*
337  * filter for locked tlb entries that reference the text/data nucleus
338  * and any bigktsb's; these will be reinstalled by cprboot on all cpus
339  */
340 /* ARGSUSED */
341 static void
342 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
343 {
344 	cti_t *ctip;
345 
346 	/*
347 	 * record tlb data at ctip->dst; the target tlb index starts
348 	 * at the highest tlb offset and moves towards 0.  the prom
349 	 * reserves both dtlb and itlb index 0.  any selected entry
350 	 * also gets marked to prevent being flushed during resume
351 	 */
352 	if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
353 	    va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
354 		ctip = ctrans;
355 		while ((1 << ctip->index) & ctip->skip)
356 			ctip->index--;
357 		ASSERT(ctip->index > 0);
358 		ASSERT(ctip->dst < ctip->tail);
359 		ctip->dst->tte.ll = ttep->ll;
360 		ctip->dst->va_tag = va_tag;
361 		ctip->dst->index = ctip->index--;
362 		ctip->dst->tmp = 0;
363 		ctip->dst++;
364 	}
365 }
366 
367 
368 /*
369  * some tlb entries are stale, filter for unlocked entries
370  * within the prom virt range and clear them
371  */
372 static void
373 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
374 {
375 	sutlb_t clr;
376 	cti_t *ctip;
377 
378 	if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
379 		ctip = ctrans;
380 		bzero(&clr, sizeof (clr));
381 		(*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
382 	}
383 }
384 
385 
386 /*
387  * some of the entries installed by cprboot are needed only on a
388  * short-term basis and need to be flushed to avoid clogging the tlbs.
389  * scan the dtte/itte arrays for items marked as temporary and clear
390  * dtlb/itlb entries using wrfunc.
391  */
392 static void
393 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
394 {
395 	sutlb_t clr, *tail;
396 
397 	bzero(&clr, sizeof (clr));
398 	for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
399 		if (listp->tmp)
400 			(*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
401 	}
402 }
403 
404 
405 /* ARGSUSED */
406 static void
407 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
408 {
409 	extern void demap_all(void);
410 	cti_t cti;
411 
412 	i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
413 	i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
414 
415 	/*
416 	 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
417 	 * a second label for vtag_flushall.  the call is made using
418 	 * vtag_flushall() instead of demap_all() due to runtime and
419 	 * krtld results with both older and newer cpu modules.
420 	 */
421 	if (&demap_all != 0) {
422 		vtag_flushall();
423 		return;
424 	}
425 
426 	/*
427 	 * for older V9 cpus, scan tlbs and clear stale entries
428 	 */
429 	bzero(&cti, sizeof (cti));
430 	cti.filter = i_cpr_ufw;
431 
432 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
433 	cti.reader = dtlb_rd_entry;
434 	cti.writer = dtlb_wr_entry;
435 	i_cpr_scan_tlb(&cti);
436 
437 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
438 	cti.reader = itlb_rd_entry;
439 	cti.writer = itlb_wr_entry;
440 	i_cpr_scan_tlb(&cti);
441 }
442 
443 
444 /*
445  * craft tlb info for tmp use during resume; this data gets used by
446  * cprboot to install tlb entries.  we also mark each struct as tmp
447  * so those tlb entries will get flushed after switching to the kernel
448  * trap table.  no data needs to be recorded for vaddr when it falls
449  * within the nucleus since we've already recorded nucleus ttes and
450  * a 8K tte would conflict with a 4MB tte.  eg: the cpr module
451  * text/data may have been loaded into the text/data nucleus.
452  */
453 static void
454 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
455 {
456 	pfn_t ppn;
457 	uint_t rw;
458 
459 	if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
460 		return;
461 
462 	while ((1 << ctip->index) & ctip->skip)
463 		ctip->index--;
464 	ASSERT(ctip->index > 0);
465 	ASSERT(ctip->dst < ctip->tail);
466 
467 	/*
468 	 * without any global service available to lookup
469 	 * a tte by vaddr, we craft our own here:
470 	 */
471 	ppn = va_to_pfn(vaddr);
472 	rw = (nbase == datava) ? TTE_HWWR_INT : 0;
473 	ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
474 	ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
475 	    TTE_CP_INT | TTE_PRIV_INT | rw;
476 	ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
477 	ctip->dst->index = ctip->index--;
478 	ctip->dst->tmp = 1;
479 	ctip->dst++;
480 }
481 
482 
483 static void
484 i_cpr_xcall(xcfunc_t func)
485 {
486 	uint_t pil, reset_pil;
487 
488 	pil = getpil();
489 	if (pil < XCALL_PIL)
490 		reset_pil = 0;
491 	else {
492 		reset_pil = 1;
493 		setpil(XCALL_PIL - 1);
494 	}
495 	xc_some(cpu_ready_set, func, 0, 0);
496 	if (reset_pil)
497 		setpil(pil);
498 }
499 
500 
501 /*
502  * restart paused slave cpus
503  */
504 void
505 i_cpr_machdep_setup(void)
506 {
507 	if (ncpus > 1) {
508 		DEBUG1(errp("MP restarted...\n"));
509 		mutex_enter(&cpu_lock);
510 		start_cpus();
511 		mutex_exit(&cpu_lock);
512 	}
513 }
514 
515 
516 /*
517  * Stop all interrupt activities in the system
518  */
519 void
520 i_cpr_stop_intr(void)
521 {
522 	(void) spl7();
523 }
524 
525 /*
526  * Set machine up to take interrupts
527  */
528 void
529 i_cpr_enable_intr(void)
530 {
531 	(void) spl0();
532 }
533 
534 
535 /*
536  * record cpu nodes and ids
537  */
538 static void
539 i_cpr_save_cpu_info(void)
540 {
541 	struct sun4u_cpu_info *scip;
542 	cpu_t *cp;
543 
544 	scip = m_info.sci;
545 	cp = CPU;
546 	do {
547 		ASSERT(scip < &m_info.sci[NCPU]);
548 		scip->cpu_id = cp->cpu_id;
549 		scip->node = cpunodes[cp->cpu_id].nodeid;
550 		scip++;
551 	} while ((cp = cp->cpu_next) != CPU);
552 }
553 
554 
555 /*
556  * Write necessary machine dependent information to cpr state file,
557  * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
558  */
559 int
560 i_cpr_write_machdep(vnode_t *vp)
561 {
562 	extern uint_t getpstate(), getwstate();
563 	extern uint_t i_cpr_tstack_size;
564 	const char ustr[] = ": unix-tte 2drop false ;";
565 	uintptr_t tinfo;
566 	label_t *ltp;
567 	cmd_t cmach;
568 	char *fmt;
569 	int rc;
570 
571 	/*
572 	 * ustr[] is used as temporary forth words during
573 	 * slave startup sequence, see sfmmu_mp_startup()
574 	 */
575 
576 	cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
577 	cmach.md_size = sizeof (m_info) + sizeof (ustr);
578 
579 	if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
580 		cpr_err(CE_WARN, "Failed to write descriptor.");
581 		return (rc);
582 	}
583 
584 	/*
585 	 * m_info is now cleared in i_cpr_dump_setup()
586 	 */
587 	m_info.ksb = (uint32_t)STACK_BIAS;
588 	m_info.kpstate = (uint16_t)getpstate();
589 	m_info.kwstate = (uint16_t)getwstate();
590 	DEBUG1(errp("stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
591 	    m_info.ksb, m_info.kpstate, m_info.kwstate));
592 
593 	ltp = &ttolwp(curthread)->lwp_qsav;
594 	m_info.qsav_pc = (cpr_ext)ltp->val[0];
595 	m_info.qsav_sp = (cpr_ext)ltp->val[1];
596 
597 	/*
598 	 * Set secondary context to INVALID_CONTEXT to force the HAT
599 	 * to re-setup the MMU registers and locked TTEs it needs for
600 	 * TLB miss handling.
601 	 */
602 	m_info.mmu_ctx_sec = INVALID_CONTEXT;
603 	m_info.mmu_ctx_pri = KCONTEXT;
604 
605 	tinfo = (uintptr_t)curthread;
606 	m_info.thrp = (cpr_ptr)tinfo;
607 
608 	tinfo = (uintptr_t)i_cpr_resume_setup;
609 	m_info.func = (cpr_ptr)tinfo;
610 
611 	/*
612 	 * i_cpr_data_page is comprised of a 4K stack area and a few
613 	 * trailing data symbols; the page is shared by the prom and
614 	 * kernel during resume.  the stack size is recorded here
615 	 * and used by cprboot to set %sp
616 	 */
617 	tinfo = (uintptr_t)&i_cpr_data_page;
618 	m_info.tmp_stack = (cpr_ptr)tinfo;
619 	m_info.tmp_stacksize = i_cpr_tstack_size;
620 
621 	m_info.test_mode = cpr_test_mode;
622 
623 	i_cpr_save_cpu_info();
624 
625 	if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
626 		cpr_err(CE_WARN, "Failed to write machdep info.");
627 		return (rc);
628 	}
629 
630 	fmt = "error writing %s forth info";
631 	if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
632 		cpr_err(CE_WARN, fmt, "unix-tte");
633 
634 	return (rc);
635 }
636 
637 
638 /*
639  * Save miscellaneous information which needs to be written to the
640  * state file.  This information is required to re-initialize
641  * kernel/prom handshaking.
642  */
643 void
644 i_cpr_save_machdep_info(void)
645 {
646 	DEBUG5(errp("jumpback size = 0x%lx\n",
647 	    (uintptr_t)&i_cpr_end_jumpback -
648 	    (uintptr_t)i_cpr_resume_setup));
649 
650 	/*
651 	 * Verify the jumpback code all falls in one page.
652 	 */
653 	if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
654 	    ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
655 		cpr_err(CE_PANIC, "jumpback code exceeds one page.");
656 }
657 
658 
659 void
660 i_cpr_set_tbr(void)
661 {
662 }
663 
664 
665 /*
666  * cpu0 should contain bootcpu info
667  */
668 cpu_t *
669 i_cpr_bootcpu(void)
670 {
671 	return (&cpu0);
672 }
673 
674 
675 /*
676  * Return the virtual address of the mapping area
677  */
678 caddr_t
679 i_cpr_map_setup(void)
680 {
681 	/*
682 	 * Allocate a virtual memory range spanned by an hmeblk.
683 	 * This would be 8 hments or 64k bytes.  Starting VA
684 	 * must be 64k (8-page) aligned.
685 	 */
686 	cpr_vaddr = vmem_xalloc(heap_arena,
687 	    mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
688 	    0, 0, NULL, NULL, VM_NOSLEEP);
689 	return (cpr_vaddr);
690 }
691 
692 /*
693  * create tmp locked tlb entries for a group of phys pages;
694  *
695  * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
696  * otherwise would fill up a tlb with locked entries
697  */
698 void
699 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
700 {
701 	tte_t tte;
702 	extern pfn_t curthreadpfn;
703 	extern int curthreadremapped;
704 
705 	curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
706 
707 	for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
708 		tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
709 		tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
710 		    TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
711 		sfmmu_dtlb_ld(vaddr, KCONTEXT, &tte);
712 	}
713 }
714 
715 void
716 i_cpr_mapout(caddr_t vaddr, uint_t pages)
717 {
718 	extern int curthreadremapped;
719 
720 	if (curthreadremapped && vaddr <= (caddr_t)curthread &&
721 	    (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
722 		curthreadremapped = 0;
723 
724 	for (; pages--; vaddr += MMU_PAGESIZE)
725 		vtag_flushpage(vaddr, KCONTEXT);
726 }
727 
728 /*
729  * We're done using the mapping area; release virtual space
730  */
731 void
732 i_cpr_map_destroy(void)
733 {
734 	vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
735 	cpr_vaddr = NULL;
736 }
737 
738 /* ARGSUSED */
739 void
740 i_cpr_handle_xc(int flag)
741 {
742 }
743 
744 
745 /*
746  * This function takes care of pages which are not in kas or need to be
747  * taken care of in a special way.  For example, panicbuf pages are not
748  * in kas and their pages are allocated via prom_retain().
749  */
750 pgcnt_t
751 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
752 {
753 	struct cpr_map_info *pri, *tail;
754 	pgcnt_t pages, total = 0;
755 	pfn_t pfn;
756 
757 	/*
758 	 * Save information about prom retained panicbuf pages
759 	 */
760 	if (bitfunc == cpr_setbit) {
761 		pri = &cpr_prom_retain[CPR_PANICBUF];
762 		pri->virt = (cpr_ptr)panicbuf;
763 		pri->phys = va_to_pa(panicbuf);
764 		pri->size = sizeof (panicbuf);
765 	}
766 
767 	/*
768 	 * Go through the prom_retain array to tag those pages.
769 	 */
770 	tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
771 	for (pri = cpr_prom_retain; pri < tail; pri++) {
772 		pages = mmu_btopr(pri->size);
773 		for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
774 			if (pf_is_memory(pfn)) {
775 				if (bitfunc == cpr_setbit) {
776 					if ((*bitfunc)(pfn, mapflag) == 0)
777 						total++;
778 				} else
779 					total++;
780 			}
781 		}
782 	}
783 
784 	return (total);
785 }
786 
787 
788 /*
789  * Free up memory-related resources here.  We start by freeing buffers
790  * allocated during suspend initialization.  Also, free up the mapping
791  * resources allocated in cpr_init().
792  */
793 void
794 i_cpr_free_memory_resources(void)
795 {
796 	(void) i_cpr_prom_pages(CPR_PROM_FREE);
797 	i_cpr_map_destroy();
798 	i_cpr_storage_free();
799 }
800 
801 
802 /*
803  * Derived from cpr_write_statefile().
804  * Save the sensitive pages to the storage area and do bookkeeping
805  * using the sensitive descriptors. Each descriptor will contain no more
806  * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
807  * of pages that statefile gets written to disk at each write.
808  * XXX The CPR_MAXCONTIG can be changed to the size of the compression
809  * scratch area.
810  */
811 static int
812 i_cpr_save_to_storage(void)
813 {
814 	sensitive_size_saved = 0;
815 	sensitive_pages_saved = 0;
816 	sensitive_write_ptr = i_cpr_storage_data_base;
817 	return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
818 }
819 
820 
821 /*
822  * This routine allocates space to save the sensitive kernel pages,
823  * i.e. kernel data nucleus, kvalloc and kvseg segments.
824  * It's assumed that those segments are the only areas that can be
825  * contaminated by memory allocations during statefile dumping.
826  * The space allocated here contains:
827  * 	A list of descriptors describing the saved sensitive pages.
828  * 	The storage area for saving the compressed sensitive kernel pages.
829  * Since storage pages are allocated from segkmem, they need to be
830  * excluded when saving.
831  */
832 int
833 i_cpr_save_sensitive_kpages(void)
834 {
835 	static const char pages_fmt[] = "\n%s %s allocs\n"
836 	    "	spages %ld, vpages %ld, diff %ld\n";
837 	int retry_cnt;
838 	int error = 0;
839 	pgcnt_t pages, spages, vpages;
840 	caddr_t	addr;
841 	char *str;
842 
843 	/*
844 	 * Tag sensitive kpages. Allocate space for storage descriptors
845 	 * and storage data area based on the resulting bitmaps.
846 	 * Note: The storage space will be part of the sensitive
847 	 * segment, so we need to tag kpages here before the storage
848 	 * is actually allocated just so their space won't be accounted
849 	 * for. They will not be part of the statefile although those
850 	 * pages will be claimed by cprboot.
851 	 */
852 	cpr_clear_bitmaps();
853 
854 	spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
855 	vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
856 	pages = spages - vpages;
857 
858 	str = "i_cpr_save_sensitive_kpages:";
859 	DEBUG7(errp(pages_fmt, "before", str, spages, vpages, pages));
860 
861 	/*
862 	 * Allocate space to save the clean sensitive kpages
863 	 */
864 	for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
865 		/*
866 		 * Alloc on first pass or realloc if we are retrying because
867 		 * of insufficient storage for sensitive pages
868 		 */
869 		if (retry_cnt == 0 || error == ENOMEM) {
870 			if (i_cpr_storage_data_base) {
871 				kmem_free(i_cpr_storage_data_base,
872 				    mmu_ptob(i_cpr_storage_data_sz));
873 				i_cpr_storage_data_base = NULL;
874 				i_cpr_storage_data_sz = 0;
875 			}
876 			addr = i_cpr_storage_data_alloc(pages,
877 			    &i_cpr_storage_data_sz, retry_cnt);
878 			if (addr == NULL) {
879 				DEBUG7(errp(
880 				    "\n%s can't allocate data storage space!\n",
881 				    str));
882 				return (ENOMEM);
883 			}
884 			i_cpr_storage_data_base = addr;
885 			i_cpr_storage_data_end =
886 			    addr + mmu_ptob(i_cpr_storage_data_sz);
887 		}
888 
889 		/*
890 		 * Allocate on first pass, only realloc if retry is because of
891 		 * insufficient descriptors, but reset contents on each pass
892 		 * (desc_alloc resets contents as well)
893 		 */
894 		if (retry_cnt == 0 || error == -1) {
895 			error = i_cpr_storage_desc_alloc(
896 			    &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
897 			    &i_cpr_storage_desc_end, retry_cnt);
898 			if (error != 0)
899 				return (error);
900 		} else {
901 			i_cpr_storage_desc_init(i_cpr_storage_desc_base,
902 			    i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
903 		}
904 
905 		/*
906 		 * We are ready to save the sensitive kpages to storage.
907 		 * We cannot trust what's tagged in the bitmaps anymore
908 		 * after storage allocations.  Clear up the bitmaps and
909 		 * retag the sensitive kpages again.  The storage pages
910 		 * should be untagged.
911 		 */
912 		cpr_clear_bitmaps();
913 
914 		spages =
915 		    i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
916 		vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
917 
918 		DEBUG7(errp(pages_fmt, "after ", str,
919 		    spages, vpages, spages - vpages));
920 
921 		/*
922 		 * Returns 0 on success, -1 if too few descriptors, and
923 		 * ENOMEM if not enough space to save sensitive pages
924 		 */
925 		DEBUG1(errp("compressing pages to storage...\n"));
926 		error = i_cpr_save_to_storage();
927 		if (error == 0) {
928 			/* Saving to storage succeeded */
929 			DEBUG1(errp("compressed %d pages\n",
930 			    sensitive_pages_saved));
931 			break;
932 		} else if (error == -1)
933 			DEBUG1(errp("%s too few descriptors\n", str));
934 	}
935 	if (error == -1)
936 		error = ENOMEM;
937 	return (error);
938 }
939 
940 
941 /*
942  * Estimate how much memory we will need to save
943  * the sensitive pages with compression.
944  */
945 static caddr_t
946 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
947 {
948 	pgcnt_t alloc_pcnt, last_pcnt;
949 	caddr_t addr;
950 	char *str;
951 
952 	str = "i_cpr_storage_data_alloc:";
953 	if (retry_cnt == 0) {
954 		/*
955 		 * common compression ratio is about 3:1
956 		 * initial storage allocation is estimated at 40%
957 		 * to cover the majority of cases
958 		 */
959 		alloc_pcnt = INITIAL_ALLOC_PCNT;
960 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
961 		DEBUG7(errp("%s sensitive pages: %ld\n", str, pages));
962 		DEBUG7(errp("%s initial est pages: %ld, alloc %ld%%\n",
963 		    str, *alloc_pages, alloc_pcnt));
964 	} else {
965 		/*
966 		 * calculate the prior compression percentage (x100)
967 		 * from the last attempt to save sensitive pages
968 		 */
969 		ASSERT(sensitive_pages_saved != 0);
970 		last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
971 		    sensitive_pages_saved;
972 		DEBUG7(errp("%s last ratio %ld%%\n", str, last_pcnt));
973 
974 		/*
975 		 * new estimated storage size is based on
976 		 * the larger ratio + 5% for each retry:
977 		 * pages * (last + [5%, 10%])
978 		 */
979 		alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
980 		    (retry_cnt * 5);
981 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
982 		DEBUG7(errp("%s Retry est pages: %ld, alloc %ld%%\n",
983 		    str, *alloc_pages, alloc_pcnt));
984 	}
985 
986 	addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
987 	DEBUG7(errp("%s alloc %ld pages\n", str, *alloc_pages));
988 	return (addr);
989 }
990 
991 
992 void
993 i_cpr_storage_free(void)
994 {
995 	/* Free descriptors */
996 	if (i_cpr_storage_desc_base) {
997 		kmem_free(i_cpr_storage_desc_base,
998 		    mmu_ptob(i_cpr_storage_desc_pgcnt));
999 		i_cpr_storage_desc_base = NULL;
1000 		i_cpr_storage_desc_pgcnt = 0;
1001 	}
1002 
1003 
1004 	/* Data storage */
1005 	if (i_cpr_storage_data_base) {
1006 		kmem_free(i_cpr_storage_data_base,
1007 		    mmu_ptob(i_cpr_storage_data_sz));
1008 		i_cpr_storage_data_base = NULL;
1009 		i_cpr_storage_data_sz = 0;
1010 	}
1011 }
1012 
1013 
1014 /*
1015  * This routine is derived from cpr_compress_and_write().
1016  * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1017  * 2. Compress and save the clean sensitive pages into the storage area.
1018  */
1019 int
1020 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1021 {
1022 	extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1023 	extern caddr_t i_cpr_storage_data_end;
1024 	uint_t remaining, datalen;
1025 	uint32_t test_usum;
1026 	char *datap;
1027 	csd_t *descp;
1028 	cpd_t cpd;
1029 	int error;
1030 
1031 	/*
1032 	 * Fill next empty storage descriptor
1033 	 */
1034 	descp = i_cpr_storage_desc_base + chunks - 1;
1035 	if (descp >= i_cpr_storage_desc_end) {
1036 		DEBUG1(errp("ran out of descriptors, base 0x%p, chunks %d, "
1037 		    "end 0x%p, descp 0x%p\n", i_cpr_storage_desc_base, chunks,
1038 		    i_cpr_storage_desc_end, descp));
1039 		return (-1);
1040 	}
1041 	ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1042 	i_cpr_storage_desc_last_used = descp;
1043 
1044 	descp->csd_dirty_spfn = spfn;
1045 	descp->csd_dirty_npages = pages;
1046 
1047 	i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1048 
1049 	/*
1050 	 * try compressing pages and copy cpd fields
1051 	 * pfn is copied for debug use
1052 	 */
1053 	cpd.cpd_pfn = spfn;
1054 	datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1055 	datalen = cpd.cpd_length;
1056 	descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1057 #ifdef DEBUG
1058 	descp->csd_usum = cpd.cpd_usum;
1059 	descp->csd_csum = cpd.cpd_csum;
1060 #endif
1061 
1062 	error = 0;
1063 
1064 	/*
1065 	 * Save the raw or compressed data to the storage area pointed to by
1066 	 * sensitive_write_ptr. Make sure the storage space is big enough to
1067 	 * hold the result. Otherwise roll back to increase the storage space.
1068 	 */
1069 	descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1070 	descp->csd_clean_sz = datalen;
1071 	if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1072 		extern	void cprbcopy(void *, void *, size_t);
1073 
1074 		cprbcopy(datap, sensitive_write_ptr, datalen);
1075 		sensitive_size_saved += datalen;
1076 		sensitive_pages_saved += descp->csd_dirty_npages;
1077 		sensitive_write_ptr += datalen;
1078 	} else {
1079 		remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1080 		DEBUG1(errp("i_cpr_compress_and_save: The storage "
1081 		    "space is too small!\ngot %d, want %d\n\n",
1082 		    remaining, (remaining + datalen)));
1083 #ifdef	DEBUG
1084 		/*
1085 		 * Check to see if the content of the sensitive pages that we
1086 		 * just copied have changed during this small time window.
1087 		 */
1088 		test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1089 		descp->csd_usum = cpd.cpd_usum;
1090 		if (test_usum != descp->csd_usum) {
1091 			DEBUG1(errp("\nWARNING: i_cpr_compress_and_save: "
1092 			    "Data in the range of pfn 0x%lx to pfn "
1093 			    "0x%lx has changed after they are saved "
1094 			    "into storage.", spfn, (spfn + pages - 1)));
1095 		}
1096 #endif
1097 		error = ENOMEM;
1098 	}
1099 
1100 	i_cpr_mapout(CPR->c_mapping_area, pages);
1101 	return (error);
1102 }
1103 
1104 
1105 /*
1106  * This routine is derived from cpr_count_kpages().
1107  * It goes through kernel data nucleus and segkmem segments to select
1108  * pages in use and mark them in the corresponding bitmap.
1109  */
1110 pgcnt_t
1111 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1112 {
1113 	pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1114 	extern caddr_t e_moddata;
1115 	extern struct seg kvalloc;
1116 	extern struct seg kmem64;
1117 	size_t size;
1118 
1119 	/*
1120 	 * Kernel data nucleus pages
1121 	 */
1122 	size = e_moddata - s_data;
1123 	kdata_cnt += cpr_count_pages(s_data, size,
1124 	    mapflag, bitfunc, DBG_SHOWRANGE);
1125 
1126 	/*
1127 	 * kvseg and kvalloc pages
1128 	 */
1129 	segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1130 	segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1131 	    mapflag, bitfunc, DBG_SHOWRANGE);
1132 
1133 	/* segment to support kernel memory usage above 32-bit space (4GB) */
1134 	if (kmem64.s_base)
1135 		segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1136 		    mapflag, bitfunc, DBG_SHOWRANGE);
1137 
1138 	DEBUG7(errp("\ni_cpr_count_sensitive_kpages:\n"
1139 	    "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1140 	    kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt));
1141 
1142 	return (kdata_cnt + segkmem_cnt);
1143 }
1144 
1145 
1146 pgcnt_t
1147 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1148 {
1149 	pgcnt_t count = 0;
1150 
1151 	if (i_cpr_storage_desc_base) {
1152 		count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1153 		    (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1154 		    mapflag, bitfunc, DBG_SHOWRANGE);
1155 	}
1156 	if (i_cpr_storage_data_base) {
1157 		count += cpr_count_pages(i_cpr_storage_data_base,
1158 		    (size_t)mmu_ptob(i_cpr_storage_data_sz),
1159 		    mapflag, bitfunc, DBG_SHOWRANGE);
1160 	}
1161 	return (count);
1162 }
1163 
1164 
1165 /*
1166  * Derived from cpr_write_statefile().
1167  * Allocate (or reallocate after exhausting the supply) descriptors for each
1168  * chunk of contiguous sensitive kpages.
1169  */
1170 static int
1171 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1172     int retry)
1173 {
1174 	pgcnt_t npages;
1175 	int chunks;
1176 	csd_t	*descp, *end;
1177 	size_t	len;
1178 	char *str = "i_cpr_storage_desc_alloc:";
1179 
1180 	/*
1181 	 * On initial allocation, add some extra to cover overhead caused
1182 	 * by the allocation for the storage area later.
1183 	 */
1184 	if (retry == 0) {
1185 		chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1186 		    EXTRA_DESCS;
1187 		npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1188 		DEBUG7(errp("%s chunks %d, ", str, chunks));
1189 	} else {
1190 		DEBUG7(errp("%s retry %d: ", str, retry));
1191 		npages = *pgsp + 1;
1192 	}
1193 	/* Free old descriptors, if any */
1194 	if (*basepp)
1195 		kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1196 
1197 	descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1198 	if (descp == NULL) {
1199 		DEBUG7(errp("%s no space for descriptors!\n", str));
1200 		return (ENOMEM);
1201 	}
1202 
1203 	*pgsp = npages;
1204 	len = mmu_ptob(npages);
1205 	end = *endpp = descp + (len / (sizeof (**basepp)));
1206 	DEBUG7(errp("npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1207 	    "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1208 	    *basepp, *endpp));
1209 	i_cpr_storage_desc_init(descp, npages, end);
1210 	return (0);
1211 }
1212 
1213 static void
1214 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1215 {
1216 	size_t	len = mmu_ptob(npages);
1217 
1218 	/* Initialize the descriptors to something impossible. */
1219 	bzero(descp, len);
1220 #ifdef	DEBUG
1221 	/*
1222 	 * This condition is tested by an ASSERT
1223 	 */
1224 	for (; descp < end; descp++)
1225 		descp->csd_dirty_spfn = (uint_t)-1;
1226 #endif
1227 }
1228 
1229 int
1230 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1231 {
1232 	int	error = 0;
1233 	uint_t	spin_cnt = 0;
1234 	csd_t	*descp;
1235 
1236 	/*
1237 	 * These following two variables need to be reinitialized
1238 	 * for each cpr cycle.
1239 	 */
1240 	i_cpr_sensitive_bytes_dumped = 0;
1241 	i_cpr_sensitive_pgs_dumped = 0;
1242 
1243 	if (i_cpr_storage_desc_base) {
1244 		for (descp = i_cpr_storage_desc_base;
1245 		    descp <= i_cpr_storage_desc_last_used; descp++) {
1246 			if (error = cpr_dump_sensitive(vp, descp))
1247 				return (error);
1248 			spin_cnt++;
1249 			if ((spin_cnt & 0x5F) == 1)
1250 				cpr_spinning_bar();
1251 		}
1252 		prom_printf(" \b");
1253 	}
1254 
1255 	DEBUG7(errp("\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1256 	    i_cpr_sensitive_pgs_dumped));
1257 	return (0);
1258 }
1259 
1260 
1261 /*
1262  * 1. Fill the cpr page descriptor with the info of the dirty pages
1263  *    and
1264  *    write the descriptor out. It will be used at resume.
1265  * 2. Write the clean data in stead of the dirty data out.
1266  *    Note: to save space, the clean data is already compressed.
1267  */
1268 static int
1269 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1270 {
1271 	int error = 0;
1272 	caddr_t datap;
1273 	cpd_t cpd;	/* cpr page descriptor */
1274 	pfn_t	dirty_spfn;
1275 	pgcnt_t dirty_npages;
1276 	size_t clean_sz;
1277 	caddr_t	clean_sva;
1278 	int	clean_compressed;
1279 	extern uchar_t cpr_pagecopy[];
1280 
1281 	dirty_spfn = descp->csd_dirty_spfn;
1282 	dirty_npages = descp->csd_dirty_npages;
1283 	clean_sva = (caddr_t)descp->csd_clean_sva;
1284 	clean_sz = descp->csd_clean_sz;
1285 	clean_compressed = descp->csd_clean_compressed;
1286 
1287 	/* Fill cpr page descriptor. */
1288 	cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1289 	cpd.cpd_pfn = dirty_spfn;
1290 	cpd.cpd_flag = 0;  /* must init to zero */
1291 	cpd.cpd_pages = dirty_npages;
1292 
1293 #ifdef	DEBUG
1294 	if ((cpd.cpd_usum = descp->csd_usum) != 0)
1295 		cpd.cpd_flag |= CPD_USUM;
1296 	if ((cpd.cpd_csum = descp->csd_csum) != 0)
1297 		cpd.cpd_flag |= CPD_CSUM;
1298 #endif
1299 
1300 	STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1301 
1302 	/*
1303 	 * The sensitive kpages are usually saved with compression
1304 	 * unless compression could not reduce the size of the data.
1305 	 * If user choose not to have the statefile compressed,
1306 	 * we need to decompress the data back before dumping it to disk.
1307 	 */
1308 	if (CPR->c_flags & C_COMPRESSING) {
1309 		cpd.cpd_length = clean_sz;
1310 		datap = clean_sva;
1311 		if (clean_compressed)
1312 			cpd.cpd_flag |= CPD_COMPRESS;
1313 	} else {
1314 		if (clean_compressed) {
1315 			cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1316 			    clean_sz, mmu_ptob(dirty_npages));
1317 			datap = (caddr_t)cpr_pagecopy;
1318 			ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1319 		} else {
1320 			cpd.cpd_length = clean_sz;
1321 			datap = clean_sva;
1322 		}
1323 		cpd.cpd_csum = 0;
1324 	}
1325 
1326 	/* Write cpr page descriptor */
1327 	error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1328 	if (error) {
1329 		DEBUG7(errp("descp: %p\n", descp));
1330 #ifdef DEBUG
1331 		debug_enter("cpr_dump_sensitive: cpr_write() page "
1332 			"descriptor failed!\n");
1333 #endif
1334 		return (error);
1335 	}
1336 
1337 	i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1338 
1339 	/* Write page data */
1340 	error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1341 	if (error) {
1342 		DEBUG7(errp("error: %x\n", error));
1343 		DEBUG7(errp("descp: %p\n", descp));
1344 		DEBUG7(errp("cpr_write(%p, %p , %lx)\n", vp, datap,
1345 			cpd.cpd_length));
1346 #ifdef DEBUG
1347 		debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1348 #endif
1349 		return (error);
1350 	}
1351 
1352 	i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1353 	i_cpr_sensitive_pgs_dumped += dirty_npages;
1354 
1355 	return (error);
1356 }
1357 
1358 
1359 /*
1360  * Sanity check to make sure that we have dumped right amount
1361  * of pages from different sources to statefile.
1362  */
1363 int
1364 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1365 {
1366 	uint_t total_pgs_dumped;
1367 
1368 	total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1369 
1370 	DEBUG7(errp("\ncheck_pgs: reg %d + sens %ld = %d, expect %d\n\n",
1371 	    regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1372 	    total_pgs_dumped, pgs_expected));
1373 
1374 	if (pgs_expected == total_pgs_dumped)
1375 		return (0);
1376 
1377 	return (EINVAL);
1378 }
1379 
1380 
1381 int
1382 i_cpr_reusefini(void)
1383 {
1384 	struct vnode *vp;
1385 	cdef_t *cdef;
1386 	size_t size;
1387 	char *bufp;
1388 	int rc;
1389 
1390 	if (cpr_reusable_mode)
1391 		cpr_reusable_mode = 0;
1392 
1393 	if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1394 		if (rc == EROFS) {
1395 			cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1396 			    "(uadmin %d %d)\nmust be done with / mounted "
1397 			    "writeable.\n", A_FREEZE, AD_REUSEFINI);
1398 		}
1399 		return (rc);
1400 	}
1401 
1402 	cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1403 	rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1404 
1405 	if (rc) {
1406 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1407 		    cpr_default_path, rc);
1408 	} else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1409 		cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1410 		    "prom values for %s", cpr_default_path,
1411 		    cpr_enumerate_promprops(&bufp, &size));
1412 		kmem_free(bufp, size);
1413 		rc = EINVAL;
1414 	} else {
1415 		/*
1416 		 * clean up prom properties
1417 		 */
1418 		rc = cpr_update_nvram(cdef->props);
1419 		if (rc == 0) {
1420 			/*
1421 			 * invalidate the disk copy and turn off reusable
1422 			 */
1423 			cdef->mini.magic = 0;
1424 			cdef->mini.reusable = 0;
1425 			if (rc = cpr_rdwr(UIO_WRITE, vp,
1426 			    &cdef->mini, sizeof (cdef->mini))) {
1427 				cpr_err(CE_WARN, "Failed writing %s, errno %d",
1428 				    cpr_default_path, rc);
1429 			}
1430 		}
1431 	}
1432 
1433 	(void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED());
1434 	VN_RELE(vp);
1435 	kmem_free(cdef, sizeof (*cdef));
1436 
1437 	return (rc);
1438 }
1439 
1440 
1441 int
1442 i_cpr_reuseinit(void)
1443 {
1444 	int rc = 0;
1445 
1446 	if (rc = cpr_default_setup(1))
1447 		return (rc);
1448 
1449 	/*
1450 	 * We need to validate default file
1451 	 */
1452 	rc = cpr_validate_definfo(1);
1453 	if (rc == 0)
1454 		cpr_reusable_mode = 1;
1455 	else if (rc == EROFS) {
1456 		cpr_err(CE_NOTE, "reuseinit must be performed "
1457 		    "while / is mounted writeable");
1458 	}
1459 
1460 	(void) cpr_default_setup(0);
1461 
1462 	return (rc);
1463 }
1464 
1465 
1466 int
1467 i_cpr_check_cprinfo(void)
1468 {
1469 	struct vnode *vp;
1470 	cmini_t mini;
1471 	int rc = 0;
1472 
1473 	if (rc = cpr_open_deffile(FREAD, &vp)) {
1474 		if (rc == ENOENT)
1475 			cpr_err(CE_NOTE, "cprinfo file does not "
1476 			    "exist.  You must run 'uadmin %d %d' "
1477 			    "command while / is mounted writeable,\n"
1478 			    "then reboot and run 'uadmin %d %d' "
1479 			    "to create a reusable statefile",
1480 			    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1481 		return (rc);
1482 	}
1483 
1484 	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1485 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
1486 	VN_RELE(vp);
1487 
1488 	if (rc) {
1489 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1490 		    cpr_default_path, rc);
1491 	} else if (mini.magic != CPR_DEFAULT_MAGIC) {
1492 		cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1493 		    "You must run 'uadmin %d %d' while / is mounted "
1494 		    "writeable, then reboot and run 'uadmin %d %d' "
1495 		    "to create a reusable statefile\n",
1496 		    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1497 		rc = EINVAL;
1498 	}
1499 
1500 	return (rc);
1501 }
1502 
1503 
1504 int
1505 i_cpr_reusable_supported(void)
1506 {
1507 	return (1);
1508 }
1509 
1510 
1511 /*
1512  * find prom phys pages and alloc space for a tmp copy
1513  */
1514 static int
1515 i_cpr_find_ppages(void)
1516 {
1517 	extern struct vnode prom_ppages;
1518 	struct page *pp;
1519 	struct memlist *pmem;
1520 	pgcnt_t npages, pcnt, scnt, vcnt;
1521 	pfn_t ppn, plast, *dst;
1522 	int mapflag;
1523 
1524 	cpr_clear_bitmaps();
1525 	mapflag = REGULAR_BITMAP;
1526 
1527 	/*
1528 	 * there should be a page_t for each phys page used by the kernel;
1529 	 * set a bit for each phys page not tracked by a page_t
1530 	 */
1531 	pcnt = 0;
1532 	memlist_read_lock();
1533 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1534 		npages = mmu_btop(pmem->size);
1535 		ppn = mmu_btop(pmem->address);
1536 		for (plast = ppn + npages; ppn < plast; ppn++) {
1537 			if (page_numtopp_nolock(ppn))
1538 				continue;
1539 			(void) cpr_setbit(ppn, mapflag);
1540 			pcnt++;
1541 		}
1542 	}
1543 	memlist_read_unlock();
1544 
1545 	/*
1546 	 * clear bits for phys pages in each segment
1547 	 */
1548 	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1549 
1550 	/*
1551 	 * set bits for phys pages referenced by the prom_ppages vnode;
1552 	 * these pages are mostly comprised of forthdebug words
1553 	 */
1554 	vcnt = 0;
1555 	for (pp = prom_ppages.v_pages; pp; ) {
1556 		if (cpr_setbit(pp->p_offset, mapflag) == 0)
1557 			vcnt++;
1558 		pp = pp->p_vpnext;
1559 		if (pp == prom_ppages.v_pages)
1560 			break;
1561 	}
1562 
1563 	/*
1564 	 * total number of prom pages are:
1565 	 * (non-page_t pages - seg pages + vnode pages)
1566 	 */
1567 	ppage_count = pcnt - scnt + vcnt;
1568 	DEBUG1(errp("find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1569 	    pcnt, scnt, vcnt, ppage_count));
1570 
1571 	/*
1572 	 * alloc array of pfn_t to store phys page list
1573 	 */
1574 	pphys_list_size = ppage_count * sizeof (pfn_t);
1575 	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1576 	if (pphys_list == NULL) {
1577 		cpr_err(CE_WARN, "cannot alloc pphys_list");
1578 		return (ENOMEM);
1579 	}
1580 
1581 	/*
1582 	 * phys pages referenced in the bitmap should be
1583 	 * those used by the prom; scan bitmap and save
1584 	 * a list of prom phys page numbers
1585 	 */
1586 	dst = pphys_list;
1587 	memlist_read_lock();
1588 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1589 		npages = mmu_btop(pmem->size);
1590 		ppn = mmu_btop(pmem->address);
1591 		for (plast = ppn + npages; ppn < plast; ppn++) {
1592 			if (cpr_isset(ppn, mapflag)) {
1593 				ASSERT(dst < (pphys_list + ppage_count));
1594 				*dst++ = ppn;
1595 			}
1596 		}
1597 	}
1598 	memlist_read_unlock();
1599 
1600 	/*
1601 	 * allocate space to store prom pages
1602 	 */
1603 	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1604 	if (ppage_buf == NULL) {
1605 		kmem_free(pphys_list, pphys_list_size);
1606 		pphys_list = NULL;
1607 		cpr_err(CE_WARN, "cannot alloc ppage_buf");
1608 		return (ENOMEM);
1609 	}
1610 
1611 	return (0);
1612 }
1613 
1614 
1615 /*
1616  * save prom pages to kmem pages
1617  */
1618 static void
1619 i_cpr_save_ppages(void)
1620 {
1621 	pfn_t *pphys, *plast;
1622 	caddr_t dst;
1623 
1624 	/*
1625 	 * map in each prom page and copy to a kmem page
1626 	 */
1627 	dst = ppage_buf;
1628 	plast = pphys_list + ppage_count;
1629 	for (pphys = pphys_list; pphys < plast; pphys++) {
1630 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1631 		bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1632 		i_cpr_mapout(cpr_vaddr, 1);
1633 		dst += MMU_PAGESIZE;
1634 	}
1635 
1636 	DEBUG1(errp("saved %ld prom pages\n", ppage_count));
1637 }
1638 
1639 
1640 /*
1641  * restore prom pages from kmem pages
1642  */
1643 static void
1644 i_cpr_restore_ppages(void)
1645 {
1646 	pfn_t *pphys, *plast;
1647 	caddr_t src;
1648 
1649 	dcache_flushall();
1650 
1651 	/*
1652 	 * map in each prom page and copy from a kmem page
1653 	 */
1654 	src = ppage_buf;
1655 	plast = pphys_list + ppage_count;
1656 	for (pphys = pphys_list; pphys < plast; pphys++) {
1657 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1658 		bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1659 		i_cpr_mapout(cpr_vaddr, 1);
1660 		src += MMU_PAGESIZE;
1661 	}
1662 
1663 	dcache_flushall();
1664 
1665 	DEBUG1(errp("restored %ld prom pages\n", ppage_count));
1666 }
1667 
1668 
1669 /*
1670  * save/restore prom pages or free related allocs
1671  */
1672 int
1673 i_cpr_prom_pages(int action)
1674 {
1675 	int error;
1676 
1677 	if (action == CPR_PROM_SAVE) {
1678 		if (ppage_buf == NULL) {
1679 			ASSERT(pphys_list == NULL);
1680 			if (error = i_cpr_find_ppages())
1681 				return (error);
1682 			i_cpr_save_ppages();
1683 		}
1684 	} else if (action == CPR_PROM_RESTORE) {
1685 		i_cpr_restore_ppages();
1686 	} else if (action == CPR_PROM_FREE) {
1687 		if (pphys_list) {
1688 			ASSERT(pphys_list_size);
1689 			kmem_free(pphys_list, pphys_list_size);
1690 			pphys_list = NULL;
1691 			pphys_list_size = 0;
1692 		}
1693 		if (ppage_buf) {
1694 			ASSERT(ppage_count);
1695 			kmem_free(ppage_buf, mmu_ptob(ppage_count));
1696 			DEBUG1(errp("freed %ld prom pages\n", ppage_count));
1697 			ppage_buf = NULL;
1698 			ppage_count = 0;
1699 		}
1700 	}
1701 	return (0);
1702 }
1703 
1704 
1705 /*
1706  * record tlb data for the nucleus, bigktsb's, and the cpr module;
1707  * this data is later used by cprboot to install dtlb/itlb entries.
1708  * when we jump into the cpr module during the resume phase, those
1709  * mappings are needed until switching to the kernel trap table.
1710  * to make the dtte/itte info available during resume, we need
1711  * the info recorded prior to saving sensitive pages, otherwise
1712  * all the data would appear as NULLs.
1713  */
1714 static void
1715 i_cpr_save_tlbinfo(void)
1716 {
1717 	cti_t cti;
1718 
1719 	/*
1720 	 * during resume - shortly after jumping into the cpr module,
1721 	 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1722 	 * index used for TSBs; skip is set so that any saved tte will
1723 	 * target other tlb offsets and prevent being lost during
1724 	 * resume.  now scan the dtlb and save locked entries,
1725 	 * then add entries for the tmp stack / data page and the
1726 	 * cpr thread structure.
1727 	 */
1728 	cti.dst = m_info.dtte;
1729 	cti.tail = cti.dst + CPR_MAX_TLB;
1730 	cti.reader = dtlb_rd_entry;
1731 	cti.writer = NULL;
1732 	cti.filter = i_cpr_lnb;
1733 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1734 	cti.skip = (1 << utsb_dtlb_ttenum);
1735 	cti.skip |= (1 << utsb4m_dtlb_ttenum);
1736 	i_cpr_scan_tlb(&cti);
1737 	i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1738 	i_cpr_make_tte(&cti, curthread, datava);
1739 
1740 	/*
1741 	 * scan itlb and save locked entries; add an entry for
1742 	 * the first text page of the cpr module; cprboot will
1743 	 * jump to that page after restoring kernel pages.
1744 	 */
1745 	cti.dst = m_info.itte;
1746 	cti.tail = cti.dst + CPR_MAX_TLB;
1747 	cti.reader = itlb_rd_entry;
1748 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1749 	cti.skip = 0;
1750 	i_cpr_scan_tlb(&cti);
1751 	i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1752 }
1753 
1754 
1755 /* ARGSUSED */
1756 int
1757 i_cpr_dump_setup(vnode_t *vp)
1758 {
1759 	/*
1760 	 * zero out m_info and add info to dtte/itte arrays
1761 	 */
1762 	bzero(&m_info, sizeof (m_info));
1763 	i_cpr_save_tlbinfo();
1764 	return (0);
1765 }
1766 
1767 
1768 int
1769 i_cpr_is_supported(void)
1770 {
1771 	char es_prop[] = "energystar-v2";
1772 	pnode_t node;
1773 	int last;
1774 	extern int cpr_supported_override;
1775 	extern int cpr_platform_enable;
1776 
1777 	/*
1778 	 * The next statement tests if a specific platform has turned off
1779 	 * cpr support.
1780 	 */
1781 	if (cpr_supported_override)
1782 		return (0);
1783 
1784 	/*
1785 	 * Do not inspect energystar-v* property if a platform has
1786 	 * specifically turned on cpr support
1787 	 */
1788 	if (cpr_platform_enable)
1789 		return (1);
1790 
1791 	node = prom_rootnode();
1792 	if (prom_getproplen(node, es_prop) != -1)
1793 		return (1);
1794 	last = strlen(es_prop) - 1;
1795 	es_prop[last] = '3';
1796 	return (prom_getproplen(node, es_prop) != -1);
1797 }
1798 
1799 
1800 /*
1801  * the actual size of the statefile data isn't known until after all the
1802  * compressed pages are written; even the inode size doesn't reflect the
1803  * data size since there are usually many extra fs blocks.  for recording
1804  * the actual data size, the first sector of the statefile is copied to
1805  * a tmp buf, and the copy is later updated and flushed to disk.
1806  */
1807 int
1808 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1809 {
1810 	extern int cpr_flush_write(vnode_t *);
1811 	static char cpr_sector[DEV_BSIZE];
1812 	cpr_ext bytes, *dst;
1813 
1814 	/*
1815 	 * this routine is called after cdd_t and csu_md_t are copied
1816 	 * to cpr_buf; mini-hack alert: the save/update method creates
1817 	 * a dependency on the combined struct size being >= one sector
1818 	 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1819 	 * over 1K bytes and will probably grow with any changes.
1820 	 *
1821 	 * copy when vp is NULL, flush when non-NULL
1822 	 */
1823 	if (vp == NULL) {
1824 		ASSERT((*bufpp - base) >= DEV_BSIZE);
1825 		bcopy(base, cpr_sector, sizeof (cpr_sector));
1826 		return (0);
1827 	} else {
1828 		bytes = dbtob(*blkno);
1829 		dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1830 		bcopy(&bytes, dst, sizeof (bytes));
1831 		bcopy(cpr_sector, base, sizeof (cpr_sector));
1832 		*bufpp = base + sizeof (cpr_sector);
1833 		*blkno = cpr_statefile_offset();
1834 		DEBUG1(errp("statefile data size: %ld\n\n", bytes));
1835 		return (cpr_flush_write(vp));
1836 	}
1837 }
1838 
1839 
1840 /*
1841  * Allocate bitmaps according to the phys_install list.
1842  */
1843 static int
1844 i_cpr_bitmap_setup(void)
1845 {
1846 	struct memlist *pmem;
1847 	cbd_t *dp, *tail;
1848 	void *space;
1849 	size_t size;
1850 
1851 	/*
1852 	 * The number of bitmap descriptors will be the count of
1853 	 * phys_install ranges plus 1 for a trailing NULL struct.
1854 	 */
1855 	cpr_nbitmaps = 1;
1856 	for (pmem = phys_install; pmem; pmem = pmem->next)
1857 		cpr_nbitmaps++;
1858 
1859 	if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1860 		cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1861 		    cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1862 		return (EFBIG);
1863 	}
1864 
1865 	/* Alloc an array of bitmap descriptors. */
1866 	dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1867 	if (dp == NULL) {
1868 		cpr_nbitmaps = 0;
1869 		return (ENOMEM);
1870 	}
1871 	tail = dp + cpr_nbitmaps;
1872 
1873 	CPR->c_bmda = dp;
1874 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1875 		size = BITMAP_BYTES(pmem->size);
1876 		space = kmem_zalloc(size * 2, KM_NOSLEEP);
1877 		if (space == NULL)
1878 			return (ENOMEM);
1879 		ASSERT(dp < tail);
1880 		dp->cbd_magic = CPR_BITMAP_MAGIC;
1881 		dp->cbd_spfn = mmu_btop(pmem->address);
1882 		dp->cbd_epfn = mmu_btop(pmem->address + pmem->size) - 1;
1883 		dp->cbd_size = size;
1884 		dp->cbd_reg_bitmap = (cpr_ptr)space;
1885 		dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1886 		dp++;
1887 	}
1888 
1889 	/* set magic for the last descriptor */
1890 	ASSERT(dp == (tail - 1));
1891 	dp->cbd_magic = CPR_BITMAP_MAGIC;
1892 
1893 	return (0);
1894 }
1895 
1896 
1897 void
1898 i_cpr_bitmap_cleanup(void)
1899 {
1900 	cbd_t *dp;
1901 
1902 	if (CPR->c_bmda == NULL)
1903 		return;
1904 	for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1905 		kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1906 	kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1907 	CPR->c_bmda = NULL;
1908 	cpr_nbitmaps = 0;
1909 }
1910 
1911 
1912 /*
1913  * A "regular" and "volatile" bitmap are created for each range of
1914  * physical memory.  The volatile maps are used to count and track pages
1915  * susceptible to heap corruption - caused by drivers that allocate mem
1916  * during VOP_DUMP(); the regular maps are used for all the other non-
1917  * susceptible pages.  Before writing the bitmaps to the statefile,
1918  * each bitmap pair gets merged to simplify handling within cprboot.
1919  */
1920 int
1921 i_cpr_alloc_bitmaps(void)
1922 {
1923 	int err;
1924 
1925 	memlist_read_lock();
1926 	err = i_cpr_bitmap_setup();
1927 	memlist_read_unlock();
1928 	if (err)
1929 		i_cpr_bitmap_cleanup();
1930 	return (err);
1931 }
1932