xref: /titanic_44/usr/src/uts/sun4u/os/cpr_impl.c (revision 2b4a78020b9c38d1b95e2f3fefa6d6e4be382d1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Platform specific implementation code
30  */
31 
32 #define	SUNDDI_IMPL
33 
34 #include <sys/types.h>
35 #include <sys/promif.h>
36 #include <sys/prom_isa.h>
37 #include <sys/prom_plat.h>
38 #include <sys/mmu.h>
39 #include <vm/hat_sfmmu.h>
40 #include <sys/iommu.h>
41 #include <sys/scb.h>
42 #include <sys/cpuvar.h>
43 #include <sys/intreg.h>
44 #include <sys/pte.h>
45 #include <vm/hat.h>
46 #include <vm/page.h>
47 #include <vm/as.h>
48 #include <sys/cpr.h>
49 #include <sys/kmem.h>
50 #include <sys/clock.h>
51 #include <sys/kmem.h>
52 #include <sys/panic.h>
53 #include <vm/seg_kmem.h>
54 #include <sys/cpu_module.h>
55 #include <sys/callb.h>
56 #include <sys/machsystm.h>
57 #include <sys/vmsystm.h>
58 #include <sys/systm.h>
59 #include <sys/archsystm.h>
60 #include <sys/stack.h>
61 #include <sys/fs/ufs_fs.h>
62 #include <sys/memlist.h>
63 #include <sys/bootconf.h>
64 #include <sys/thread.h>
65 #include <vm/vm_dep.h>
66 
67 extern	void cpr_clear_bitmaps(void);
68 extern	int cpr_setbit(pfn_t ppn, int mapflag);
69 extern	int cpr_clrbit(pfn_t ppn, int mapflag);
70 extern	pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg);
71 extern	pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc);
72 extern	void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
73 extern	void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
74 
75 static	int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
76 static	void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
77 static	caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
78 static	int cpr_dump_sensitive(vnode_t *, csd_t *);
79 static	void i_cpr_clear_entries(uint64_t, uint64_t);
80 static	void i_cpr_xcall(xcfunc_t);
81 
82 void	i_cpr_storage_free(void);
83 
84 extern void *i_cpr_data_page;
85 extern int cpr_test_mode;
86 extern int cpr_nbitmaps;
87 extern char cpr_default_path[];
88 extern caddr_t textva, datava;
89 
90 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
91 caddr_t cpr_vaddr = NULL;
92 
93 static	uint_t sensitive_pages_saved;
94 static	uint_t sensitive_size_saved;
95 
96 caddr_t	i_cpr_storage_data_base;
97 caddr_t	i_cpr_storage_data_end;
98 csd_t *i_cpr_storage_desc_base;
99 csd_t *i_cpr_storage_desc_end;		/* one byte beyond last used descp */
100 csd_t *i_cpr_storage_desc_last_used;	/* last used descriptor */
101 caddr_t sensitive_write_ptr;		/* position for next storage write */
102 
103 size_t	i_cpr_sensitive_bytes_dumped;
104 pgcnt_t	i_cpr_sensitive_pgs_dumped;
105 pgcnt_t	i_cpr_storage_data_sz;		/* in pages */
106 pgcnt_t	i_cpr_storage_desc_pgcnt;	/* in pages */
107 
108 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
109 static	csu_md_t m_info;
110 
111 
112 #define	MAX_STORAGE_RETRY	3
113 #define	MAX_STORAGE_ALLOC_RETRY	3
114 #define	INITIAL_ALLOC_PCNT	40	/* starting allocation percentage */
115 #define	INTEGRAL		100	/* to get 1% precision */
116 
117 #define	EXTRA_RATE		2	/* add EXTRA_RATE% extra space */
118 #define	EXTRA_DESCS		10
119 
120 #define	CPR_NO_STORAGE_DESC	1
121 #define	CPR_NO_STORAGE_DATA	2
122 
123 #define	CIF_SPLICE		0
124 #define	CIF_UNLINK		1
125 
126 
127 /*
128  * CPR miscellaneous support routines
129  */
130 #define	cpr_open(path, mode,  vpp)	(vn_open(path, UIO_SYSSPACE, \
131 		mode, 0600, vpp, CRCREAT, 0))
132 #define	cpr_rdwr(rw, vp, basep, cnt)	(vn_rdwr(rw, vp,  (caddr_t)(basep), \
133 		cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
134 		(ssize_t *)NULL))
135 
136 /*
137  * definitions for saving/restoring prom pages
138  */
139 static void	*ppage_buf;
140 static pgcnt_t	ppage_count;
141 static pfn_t	*pphys_list;
142 static size_t	pphys_list_size;
143 
144 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
145 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
146 
147 /*
148  * private struct for tlb handling
149  */
150 struct cpr_trans_info {
151 	sutlb_t		*dst;
152 	sutlb_t		*tail;
153 	tlb_rw_t	reader;
154 	tlb_rw_t	writer;
155 	tlb_filter_t	filter;
156 	int		index;
157 	uint64_t	skip;		/* assumes TLB <= 64 locked entries */
158 };
159 typedef struct cpr_trans_info cti_t;
160 
161 
162 /*
163  * special handling for tlb info
164  */
165 #define	WITHIN_OFW(va) \
166 	(((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
167 
168 #define	WITHIN_NUCLEUS(va, base) \
169 	(((va) >= (base)) && \
170 	(((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
171 
172 #define	IS_BIGKTSB(va) \
173 	(enable_bigktsb && \
174 	((va) >= (uint64_t)ktsb_base) && \
175 	((va) < (uint64_t)(ktsb_base + ktsb_sz)))
176 
177 
178 /*
179  * WARNING:
180  * the text from this file is linked to follow cpr_resume_setup.o;
181  * only add text between here and i_cpr_end_jumpback when it needs
182  * to be called during resume before we switch back to the kernel
183  * trap table.  all the text in this range must fit within a page.
184  */
185 
186 
187 /*
188  * each time a machine is reset, the prom uses an inconsistent set of phys
189  * pages and the cif cookie may differ as well.  so prior to restoring the
190  * original prom, we have to use to use the new/tmp prom's translations
191  * when requesting prom services.
192  *
193  * cif_handler starts out as the original prom cookie, and that gets used
194  * by client_handler() to jump into the prom.  here we splice-in a wrapper
195  * routine by writing cif_handler; client_handler() will now jump to the
196  * wrapper which switches the %tba to the new/tmp prom's trap table then
197  * jumps to the new cookie.
198  */
199 void
200 i_cpr_cif_setup(int action)
201 {
202 	extern void *i_cpr_orig_cif, *cif_handler;
203 	extern int i_cpr_cif_wrapper(void *);
204 
205 	/*
206 	 * save the original cookie and change the current cookie to the
207 	 * wrapper routine.  later we just restore the original cookie.
208 	 */
209 	if (action == CIF_SPLICE) {
210 		i_cpr_orig_cif = cif_handler;
211 		cif_handler = (void *)i_cpr_cif_wrapper;
212 	} else if (action == CIF_UNLINK)
213 		cif_handler = i_cpr_orig_cif;
214 }
215 
216 
217 /*
218  * launch slave cpus into kernel text, pause them,
219  * and restore the original prom pages
220  */
221 void
222 i_cpr_mp_setup(void)
223 {
224 	extern void restart_other_cpu(int);
225 	cpu_t *cp;
226 
227 	uint64_t kctx = kcontextreg;
228 
229 	/*
230 	 * Do not allow setting page size codes in MMU primary context
231 	 * register while using cif wrapper. This is needed to work
232 	 * around OBP incorrect handling of this MMU register.
233 	 */
234 	kcontextreg = 0;
235 
236 	/*
237 	 * reset cpu_ready_set so x_calls work properly
238 	 */
239 	CPUSET_ZERO(cpu_ready_set);
240 	CPUSET_ADD(cpu_ready_set, getprocessorid());
241 
242 	/*
243 	 * setup cif to use the cookie from the new/tmp prom
244 	 * and setup tmp handling for calling prom services.
245 	 */
246 	i_cpr_cif_setup(CIF_SPLICE);
247 
248 	/*
249 	 * at this point, only the nucleus and a few cpr pages are
250 	 * mapped in.  once we switch to the kernel trap table,
251 	 * we can access the rest of kernel space.
252 	 */
253 	prom_set_traptable(&trap_table);
254 
255 	if (ncpus > 1) {
256 		sfmmu_init_tsbs();
257 
258 		mutex_enter(&cpu_lock);
259 		/*
260 		 * All of the slave cpus are not ready at this time,
261 		 * yet the cpu structures have various cpu_flags set;
262 		 * clear cpu_flags and mutex_ready.
263 		 * Since we are coming up from a CPU suspend, the slave cpus
264 		 * are frozen.
265 		 */
266 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
267 			cp->cpu_flags = CPU_FROZEN;
268 			cp->cpu_m.mutex_ready = 0;
269 		}
270 
271 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
272 			restart_other_cpu(cp->cpu_id);
273 
274 		pause_cpus(NULL);
275 		mutex_exit(&cpu_lock);
276 
277 		i_cpr_xcall(i_cpr_clear_entries);
278 	} else
279 		i_cpr_clear_entries(0, 0);
280 
281 	/*
282 	 * now unlink the cif wrapper;  WARNING: do not call any
283 	 * prom_xxx() routines until after prom pages are restored.
284 	 */
285 	i_cpr_cif_setup(CIF_UNLINK);
286 
287 	(void) i_cpr_prom_pages(CPR_PROM_RESTORE);
288 
289 	/* allow setting page size codes in MMU primary context register */
290 	kcontextreg = kctx;
291 }
292 
293 
294 /*
295  * end marker for jumpback page;
296  * this symbol is used to check the size of i_cpr_resume_setup()
297  * and the above text.  For simplicity, the Makefile needs to
298  * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
299  */
300 void
301 i_cpr_end_jumpback(void)
302 {
303 }
304 
305 
306 /*
307  * scan tlb entries with reader; when valid entries are found,
308  * the filter routine will selectively save/clear them
309  */
310 static void
311 i_cpr_scan_tlb(cti_t *ctip)
312 {
313 	uint64_t va_tag;
314 	int tlb_index;
315 	tte_t tte;
316 
317 	for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
318 		(*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
319 		if (va_tag && TTE_IS_VALID(&tte))
320 			(*ctip->filter)(tlb_index, &tte, va_tag, ctip);
321 	}
322 }
323 
324 
325 /*
326  * filter for locked tlb entries that reference the text/data nucleus
327  * and any bigktsb's; these will be reinstalled by cprboot on all cpus
328  */
329 /* ARGSUSED */
330 static void
331 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
332 {
333 	cti_t *ctip;
334 
335 	/*
336 	 * record tlb data at ctip->dst; the target tlb index starts
337 	 * at the highest tlb offset and moves towards 0.  the prom
338 	 * reserves both dtlb and itlb index 0.  any selected entry
339 	 * also gets marked to prevent being flushed during resume
340 	 */
341 	if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
342 	    va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
343 		ctip = ctrans;
344 		while ((1 << ctip->index) & ctip->skip)
345 			ctip->index--;
346 		ASSERT(ctip->index > 0);
347 		ASSERT(ctip->dst < ctip->tail);
348 		ctip->dst->tte.ll = ttep->ll;
349 		ctip->dst->va_tag = va_tag;
350 		ctip->dst->index = ctip->index--;
351 		ctip->dst->tmp = 0;
352 		ctip->dst++;
353 	}
354 }
355 
356 
357 /*
358  * some tlb entries are stale, filter for unlocked entries
359  * within the prom virt range and clear them
360  */
361 static void
362 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
363 {
364 	sutlb_t clr;
365 	cti_t *ctip;
366 
367 	if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
368 		ctip = ctrans;
369 		bzero(&clr, sizeof (clr));
370 		(*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
371 	}
372 }
373 
374 
375 /*
376  * some of the entries installed by cprboot are needed only on a
377  * short-term basis and need to be flushed to avoid clogging the tlbs.
378  * scan the dtte/itte arrays for items marked as temporary and clear
379  * dtlb/itlb entries using wrfunc.
380  */
381 static void
382 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
383 {
384 	sutlb_t clr, *tail;
385 
386 	bzero(&clr, sizeof (clr));
387 	for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
388 		if (listp->tmp)
389 			(*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
390 	}
391 }
392 
393 
394 /* ARGSUSED */
395 static void
396 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
397 {
398 	extern void demap_all(void);
399 	cti_t cti;
400 
401 	i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
402 	i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
403 
404 	/*
405 	 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
406 	 * a second label for vtag_flushall.  the call is made using
407 	 * vtag_flushall() instead of demap_all() due to runtime and
408 	 * krtld results with both older and newer cpu modules.
409 	 */
410 	if (&demap_all != 0) {
411 		vtag_flushall();
412 		return;
413 	}
414 
415 	/*
416 	 * for older V9 cpus, scan tlbs and clear stale entries
417 	 */
418 	bzero(&cti, sizeof (cti));
419 	cti.filter = i_cpr_ufw;
420 
421 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
422 	cti.reader = dtlb_rd_entry;
423 	cti.writer = dtlb_wr_entry;
424 	i_cpr_scan_tlb(&cti);
425 
426 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
427 	cti.reader = itlb_rd_entry;
428 	cti.writer = itlb_wr_entry;
429 	i_cpr_scan_tlb(&cti);
430 }
431 
432 
433 /*
434  * craft tlb info for tmp use during resume; this data gets used by
435  * cprboot to install tlb entries.  we also mark each struct as tmp
436  * so those tlb entries will get flushed after switching to the kernel
437  * trap table.  no data needs to be recorded for vaddr when it falls
438  * within the nucleus since we've already recorded nucleus ttes and
439  * a 8K tte would conflict with a 4MB tte.  eg: the cpr module
440  * text/data may have been loaded into the text/data nucleus.
441  */
442 static void
443 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
444 {
445 	pfn_t ppn;
446 	uint_t rw;
447 
448 	if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
449 		return;
450 
451 	while ((1 << ctip->index) & ctip->skip)
452 		ctip->index--;
453 	ASSERT(ctip->index > 0);
454 	ASSERT(ctip->dst < ctip->tail);
455 
456 	/*
457 	 * without any global service available to lookup
458 	 * a tte by vaddr, we craft our own here:
459 	 */
460 	ppn = va_to_pfn(vaddr);
461 	rw = (nbase == datava) ? TTE_HWWR_INT : 0;
462 	ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
463 	ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
464 	    TTE_CP_INT | TTE_PRIV_INT | rw;
465 	ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
466 	ctip->dst->index = ctip->index--;
467 	ctip->dst->tmp = 1;
468 	ctip->dst++;
469 }
470 
471 
472 static void
473 i_cpr_xcall(xcfunc_t func)
474 {
475 	uint_t pil, reset_pil;
476 
477 	pil = getpil();
478 	if (pil < XCALL_PIL)
479 		reset_pil = 0;
480 	else {
481 		reset_pil = 1;
482 		setpil(XCALL_PIL - 1);
483 	}
484 	xc_some(cpu_ready_set, func, 0, 0);
485 	if (reset_pil)
486 		setpil(pil);
487 }
488 
489 
490 /*
491  * restart paused slave cpus
492  */
493 void
494 i_cpr_machdep_setup(void)
495 {
496 	if (ncpus > 1) {
497 		CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
498 		mutex_enter(&cpu_lock);
499 		start_cpus();
500 		mutex_exit(&cpu_lock);
501 	}
502 }
503 
504 
505 /*
506  * Stop all interrupt activities in the system
507  */
508 void
509 i_cpr_stop_intr(void)
510 {
511 	(void) spl7();
512 }
513 
514 /*
515  * Set machine up to take interrupts
516  */
517 void
518 i_cpr_enable_intr(void)
519 {
520 	(void) spl0();
521 }
522 
523 
524 /*
525  * record cpu nodes and ids
526  */
527 static void
528 i_cpr_save_cpu_info(void)
529 {
530 	struct sun4u_cpu_info *scip;
531 	cpu_t *cp;
532 
533 	scip = m_info.sci;
534 	cp = CPU;
535 	do {
536 		ASSERT(scip < &m_info.sci[NCPU]);
537 		scip->cpu_id = cp->cpu_id;
538 		scip->node = cpunodes[cp->cpu_id].nodeid;
539 		scip++;
540 	} while ((cp = cp->cpu_next) != CPU);
541 }
542 
543 
544 /*
545  * Write necessary machine dependent information to cpr state file,
546  * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
547  */
548 int
549 i_cpr_write_machdep(vnode_t *vp)
550 {
551 	extern uint_t getpstate(), getwstate();
552 	extern uint_t i_cpr_tstack_size;
553 	const char ustr[] = ": unix-tte 2drop false ;";
554 	uintptr_t tinfo;
555 	label_t *ltp;
556 	cmd_t cmach;
557 	char *fmt;
558 	int rc;
559 
560 	/*
561 	 * ustr[] is used as temporary forth words during
562 	 * slave startup sequence, see sfmmu_mp_startup()
563 	 */
564 
565 	cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
566 	cmach.md_size = sizeof (m_info) + sizeof (ustr);
567 
568 	if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
569 		cpr_err(CE_WARN, "Failed to write descriptor.");
570 		return (rc);
571 	}
572 
573 	/*
574 	 * m_info is now cleared in i_cpr_dump_setup()
575 	 */
576 	m_info.ksb = (uint32_t)STACK_BIAS;
577 	m_info.kpstate = (uint16_t)getpstate();
578 	m_info.kwstate = (uint16_t)getwstate();
579 	CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
580 	    m_info.ksb, m_info.kpstate, m_info.kwstate);
581 
582 	ltp = &ttolwp(curthread)->lwp_qsav;
583 	m_info.qsav_pc = (cpr_ext)ltp->val[0];
584 	m_info.qsav_sp = (cpr_ext)ltp->val[1];
585 
586 	/*
587 	 * Set secondary context to INVALID_CONTEXT to force the HAT
588 	 * to re-setup the MMU registers and locked TTEs it needs for
589 	 * TLB miss handling.
590 	 */
591 	m_info.mmu_ctx_sec = INVALID_CONTEXT;
592 	m_info.mmu_ctx_pri = KCONTEXT;
593 
594 	tinfo = (uintptr_t)curthread;
595 	m_info.thrp = (cpr_ptr)tinfo;
596 
597 	tinfo = (uintptr_t)i_cpr_resume_setup;
598 	m_info.func = (cpr_ptr)tinfo;
599 
600 	/*
601 	 * i_cpr_data_page is comprised of a 4K stack area and a few
602 	 * trailing data symbols; the page is shared by the prom and
603 	 * kernel during resume.  the stack size is recorded here
604 	 * and used by cprboot to set %sp
605 	 */
606 	tinfo = (uintptr_t)&i_cpr_data_page;
607 	m_info.tmp_stack = (cpr_ptr)tinfo;
608 	m_info.tmp_stacksize = i_cpr_tstack_size;
609 
610 	m_info.test_mode = cpr_test_mode;
611 
612 	i_cpr_save_cpu_info();
613 
614 	if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
615 		cpr_err(CE_WARN, "Failed to write machdep info.");
616 		return (rc);
617 	}
618 
619 	fmt = "error writing %s forth info";
620 	if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
621 		cpr_err(CE_WARN, fmt, "unix-tte");
622 
623 	return (rc);
624 }
625 
626 
627 /*
628  * Save miscellaneous information which needs to be written to the
629  * state file.  This information is required to re-initialize
630  * kernel/prom handshaking.
631  */
632 void
633 i_cpr_save_machdep_info(void)
634 {
635 	CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
636 	    (uintptr_t)&i_cpr_end_jumpback -
637 	    (uintptr_t)i_cpr_resume_setup);
638 
639 	/*
640 	 * Verify the jumpback code all falls in one page.
641 	 */
642 	if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
643 	    ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
644 		cpr_err(CE_PANIC, "jumpback code exceeds one page.");
645 }
646 
647 
648 /*
649  * cpu0 should contain bootcpu info
650  */
651 cpu_t *
652 i_cpr_bootcpu(void)
653 {
654 	return (&cpu0);
655 }
656 
657 processorid_t
658 i_cpr_bootcpuid(void)
659 {
660 	return (0);
661 }
662 
663 /*
664  * Return the virtual address of the mapping area
665  */
666 caddr_t
667 i_cpr_map_setup(void)
668 {
669 	/*
670 	 * Allocate a virtual memory range spanned by an hmeblk.
671 	 * This would be 8 hments or 64k bytes.  Starting VA
672 	 * must be 64k (8-page) aligned.
673 	 */
674 	cpr_vaddr = vmem_xalloc(heap_arena,
675 	    mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
676 	    0, 0, NULL, NULL, VM_NOSLEEP);
677 	return (cpr_vaddr);
678 }
679 
680 /*
681  * create tmp locked tlb entries for a group of phys pages;
682  *
683  * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
684  * otherwise would fill up a tlb with locked entries
685  */
686 void
687 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
688 {
689 	tte_t tte;
690 	extern pfn_t curthreadpfn;
691 	extern int curthreadremapped;
692 
693 	curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
694 
695 	for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
696 		tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
697 		tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
698 		    TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
699 		sfmmu_dtlb_ld_kva(vaddr, &tte);
700 	}
701 }
702 
703 void
704 i_cpr_mapout(caddr_t vaddr, uint_t pages)
705 {
706 	extern int curthreadremapped;
707 
708 	if (curthreadremapped && vaddr <= (caddr_t)curthread &&
709 	    (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
710 		curthreadremapped = 0;
711 
712 	for (; pages--; vaddr += MMU_PAGESIZE)
713 		vtag_flushpage(vaddr, (uint64_t)ksfmmup);
714 }
715 
716 /*
717  * We're done using the mapping area; release virtual space
718  */
719 void
720 i_cpr_map_destroy(void)
721 {
722 	vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
723 	cpr_vaddr = NULL;
724 }
725 
726 /* ARGSUSED */
727 void
728 i_cpr_handle_xc(int flag)
729 {
730 }
731 
732 
733 /*
734  * This function takes care of pages which are not in kas or need to be
735  * taken care of in a special way.  For example, panicbuf pages are not
736  * in kas and their pages are allocated via prom_retain().
737  */
738 pgcnt_t
739 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
740 {
741 	struct cpr_map_info *pri, *tail;
742 	pgcnt_t pages, total = 0;
743 	pfn_t pfn;
744 
745 	/*
746 	 * Save information about prom retained panicbuf pages
747 	 */
748 	if (bitfunc == cpr_setbit) {
749 		pri = &cpr_prom_retain[CPR_PANICBUF];
750 		pri->virt = (cpr_ptr)panicbuf;
751 		pri->phys = va_to_pa(panicbuf);
752 		pri->size = sizeof (panicbuf);
753 	}
754 
755 	/*
756 	 * Go through the prom_retain array to tag those pages.
757 	 */
758 	tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
759 	for (pri = cpr_prom_retain; pri < tail; pri++) {
760 		pages = mmu_btopr(pri->size);
761 		for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
762 			if (pf_is_memory(pfn)) {
763 				if (bitfunc == cpr_setbit) {
764 					if ((*bitfunc)(pfn, mapflag) == 0)
765 						total++;
766 				} else
767 					total++;
768 			}
769 		}
770 	}
771 
772 	return (total);
773 }
774 
775 
776 /*
777  * Free up memory-related resources here.  We start by freeing buffers
778  * allocated during suspend initialization.  Also, free up the mapping
779  * resources allocated in cpr_init().
780  */
781 void
782 i_cpr_free_memory_resources(void)
783 {
784 	(void) i_cpr_prom_pages(CPR_PROM_FREE);
785 	i_cpr_map_destroy();
786 	i_cpr_storage_free();
787 }
788 
789 
790 /*
791  * Derived from cpr_write_statefile().
792  * Save the sensitive pages to the storage area and do bookkeeping
793  * using the sensitive descriptors. Each descriptor will contain no more
794  * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
795  * of pages that statefile gets written to disk at each write.
796  * XXX The CPR_MAXCONTIG can be changed to the size of the compression
797  * scratch area.
798  */
799 static int
800 i_cpr_save_to_storage(void)
801 {
802 	sensitive_size_saved = 0;
803 	sensitive_pages_saved = 0;
804 	sensitive_write_ptr = i_cpr_storage_data_base;
805 	return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
806 }
807 
808 
809 /*
810  * This routine allocates space to save the sensitive kernel pages,
811  * i.e. kernel data nucleus, kvalloc and kvseg segments.
812  * It's assumed that those segments are the only areas that can be
813  * contaminated by memory allocations during statefile dumping.
814  * The space allocated here contains:
815  * 	A list of descriptors describing the saved sensitive pages.
816  * 	The storage area for saving the compressed sensitive kernel pages.
817  * Since storage pages are allocated from segkmem, they need to be
818  * excluded when saving.
819  */
820 int
821 i_cpr_save_sensitive_kpages(void)
822 {
823 	static const char pages_fmt[] = "\n%s %s allocs\n"
824 	    "	spages %ld, vpages %ld, diff %ld\n";
825 	int retry_cnt;
826 	int error = 0;
827 	pgcnt_t pages, spages, vpages;
828 	caddr_t	addr;
829 	char *str;
830 
831 	/*
832 	 * Tag sensitive kpages. Allocate space for storage descriptors
833 	 * and storage data area based on the resulting bitmaps.
834 	 * Note: The storage space will be part of the sensitive
835 	 * segment, so we need to tag kpages here before the storage
836 	 * is actually allocated just so their space won't be accounted
837 	 * for. They will not be part of the statefile although those
838 	 * pages will be claimed by cprboot.
839 	 */
840 	cpr_clear_bitmaps();
841 
842 	spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
843 	vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
844 	pages = spages - vpages;
845 
846 	str = "i_cpr_save_sensitive_kpages:";
847 	CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);
848 
849 	/*
850 	 * Allocate space to save the clean sensitive kpages
851 	 */
852 	for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
853 		/*
854 		 * Alloc on first pass or realloc if we are retrying because
855 		 * of insufficient storage for sensitive pages
856 		 */
857 		if (retry_cnt == 0 || error == ENOMEM) {
858 			if (i_cpr_storage_data_base) {
859 				kmem_free(i_cpr_storage_data_base,
860 				    mmu_ptob(i_cpr_storage_data_sz));
861 				i_cpr_storage_data_base = NULL;
862 				i_cpr_storage_data_sz = 0;
863 			}
864 			addr = i_cpr_storage_data_alloc(pages,
865 			    &i_cpr_storage_data_sz, retry_cnt);
866 			if (addr == NULL) {
867 				CPR_DEBUG(CPR_DEBUG7,
868 				    "\n%s can't allocate data storage space!\n",
869 				    str);
870 				return (ENOMEM);
871 			}
872 			i_cpr_storage_data_base = addr;
873 			i_cpr_storage_data_end =
874 			    addr + mmu_ptob(i_cpr_storage_data_sz);
875 		}
876 
877 		/*
878 		 * Allocate on first pass, only realloc if retry is because of
879 		 * insufficient descriptors, but reset contents on each pass
880 		 * (desc_alloc resets contents as well)
881 		 */
882 		if (retry_cnt == 0 || error == -1) {
883 			error = i_cpr_storage_desc_alloc(
884 			    &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
885 			    &i_cpr_storage_desc_end, retry_cnt);
886 			if (error != 0)
887 				return (error);
888 		} else {
889 			i_cpr_storage_desc_init(i_cpr_storage_desc_base,
890 			    i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
891 		}
892 
893 		/*
894 		 * We are ready to save the sensitive kpages to storage.
895 		 * We cannot trust what's tagged in the bitmaps anymore
896 		 * after storage allocations.  Clear up the bitmaps and
897 		 * retag the sensitive kpages again.  The storage pages
898 		 * should be untagged.
899 		 */
900 		cpr_clear_bitmaps();
901 
902 		spages =
903 		    i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
904 		vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
905 
906 		CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
907 		    spages, vpages, spages - vpages);
908 
909 		/*
910 		 * Returns 0 on success, -1 if too few descriptors, and
911 		 * ENOMEM if not enough space to save sensitive pages
912 		 */
913 		CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
914 		error = i_cpr_save_to_storage();
915 		if (error == 0) {
916 			/* Saving to storage succeeded */
917 			CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
918 			    sensitive_pages_saved);
919 			break;
920 		} else if (error == -1)
921 			CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
922 	}
923 	if (error == -1)
924 		error = ENOMEM;
925 	return (error);
926 }
927 
928 
929 /*
930  * Estimate how much memory we will need to save
931  * the sensitive pages with compression.
932  */
933 static caddr_t
934 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
935 {
936 	pgcnt_t alloc_pcnt, last_pcnt;
937 	caddr_t addr;
938 	char *str;
939 
940 	str = "i_cpr_storage_data_alloc:";
941 	if (retry_cnt == 0) {
942 		/*
943 		 * common compression ratio is about 3:1
944 		 * initial storage allocation is estimated at 40%
945 		 * to cover the majority of cases
946 		 */
947 		alloc_pcnt = INITIAL_ALLOC_PCNT;
948 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
949 		CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
950 		CPR_DEBUG(CPR_DEBUG7,
951 		    "%s initial est pages: %ld, alloc %ld%%\n",
952 		    str, *alloc_pages, alloc_pcnt);
953 	} else {
954 		/*
955 		 * calculate the prior compression percentage (x100)
956 		 * from the last attempt to save sensitive pages
957 		 */
958 		ASSERT(sensitive_pages_saved != 0);
959 		last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
960 		    sensitive_pages_saved;
961 		CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);
962 
963 		/*
964 		 * new estimated storage size is based on
965 		 * the larger ratio + 5% for each retry:
966 		 * pages * (last + [5%, 10%])
967 		 */
968 		alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
969 		    (retry_cnt * 5);
970 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
971 		CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
972 		    str, *alloc_pages, alloc_pcnt);
973 	}
974 
975 	addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
976 	CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
977 	return (addr);
978 }
979 
980 
981 void
982 i_cpr_storage_free(void)
983 {
984 	/* Free descriptors */
985 	if (i_cpr_storage_desc_base) {
986 		kmem_free(i_cpr_storage_desc_base,
987 		    mmu_ptob(i_cpr_storage_desc_pgcnt));
988 		i_cpr_storage_desc_base = NULL;
989 		i_cpr_storage_desc_pgcnt = 0;
990 	}
991 
992 
993 	/* Data storage */
994 	if (i_cpr_storage_data_base) {
995 		kmem_free(i_cpr_storage_data_base,
996 		    mmu_ptob(i_cpr_storage_data_sz));
997 		i_cpr_storage_data_base = NULL;
998 		i_cpr_storage_data_sz = 0;
999 	}
1000 }
1001 
1002 
1003 /*
1004  * This routine is derived from cpr_compress_and_write().
1005  * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1006  * 2. Compress and save the clean sensitive pages into the storage area.
1007  */
1008 int
1009 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1010 {
1011 	extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1012 	extern caddr_t i_cpr_storage_data_end;
1013 	uint_t remaining, datalen;
1014 	uint32_t test_usum;
1015 	char *datap;
1016 	csd_t *descp;
1017 	cpd_t cpd;
1018 	int error;
1019 
1020 	/*
1021 	 * Fill next empty storage descriptor
1022 	 */
1023 	descp = i_cpr_storage_desc_base + chunks - 1;
1024 	if (descp >= i_cpr_storage_desc_end) {
1025 		CPR_DEBUG(CPR_DEBUG1, "ran out of descriptors, base 0x%p, "
1026 		    "chunks %d, end 0x%p, descp 0x%p\n",
1027 		    (void *)i_cpr_storage_desc_base, chunks,
1028 		    (void *)i_cpr_storage_desc_end, (void *)descp);
1029 		return (-1);
1030 	}
1031 	ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1032 	i_cpr_storage_desc_last_used = descp;
1033 
1034 	descp->csd_dirty_spfn = spfn;
1035 	descp->csd_dirty_npages = pages;
1036 
1037 	i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1038 
1039 	/*
1040 	 * try compressing pages and copy cpd fields
1041 	 * pfn is copied for debug use
1042 	 */
1043 	cpd.cpd_pfn = spfn;
1044 	datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1045 	datalen = cpd.cpd_length;
1046 	descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1047 #ifdef DEBUG
1048 	descp->csd_usum = cpd.cpd_usum;
1049 	descp->csd_csum = cpd.cpd_csum;
1050 #endif
1051 
1052 	error = 0;
1053 
1054 	/*
1055 	 * Save the raw or compressed data to the storage area pointed to by
1056 	 * sensitive_write_ptr. Make sure the storage space is big enough to
1057 	 * hold the result. Otherwise roll back to increase the storage space.
1058 	 */
1059 	descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1060 	descp->csd_clean_sz = datalen;
1061 	if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1062 		extern	void cprbcopy(void *, void *, size_t);
1063 
1064 		cprbcopy(datap, sensitive_write_ptr, datalen);
1065 		sensitive_size_saved += datalen;
1066 		sensitive_pages_saved += descp->csd_dirty_npages;
1067 		sensitive_write_ptr += datalen;
1068 	} else {
1069 		remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1070 		CPR_DEBUG(CPR_DEBUG1, "i_cpr_compress_and_save: The storage "
1071 		    "space is too small!\ngot %d, want %d\n\n",
1072 		    remaining, (remaining + datalen));
1073 #ifdef	DEBUG
1074 		/*
1075 		 * Check to see if the content of the sensitive pages that we
1076 		 * just copied have changed during this small time window.
1077 		 */
1078 		test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1079 		descp->csd_usum = cpd.cpd_usum;
1080 		if (test_usum != descp->csd_usum) {
1081 			CPR_DEBUG(CPR_DEBUG1, "\nWARNING: "
1082 			    "i_cpr_compress_and_save: "
1083 			    "Data in the range of pfn 0x%lx to pfn "
1084 			    "0x%lx has changed after they are saved "
1085 			    "into storage.", spfn, (spfn + pages - 1));
1086 		}
1087 #endif
1088 		error = ENOMEM;
1089 	}
1090 
1091 	i_cpr_mapout(CPR->c_mapping_area, pages);
1092 	return (error);
1093 }
1094 
1095 
1096 /*
1097  * This routine is derived from cpr_count_kpages().
1098  * It goes through kernel data nucleus and segkmem segments to select
1099  * pages in use and mark them in the corresponding bitmap.
1100  */
1101 pgcnt_t
1102 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1103 {
1104 	pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1105 	extern caddr_t e_moddata;
1106 	extern struct seg kvalloc;
1107 	extern struct seg kmem64;
1108 	size_t size;
1109 
1110 	/*
1111 	 * Kernel data nucleus pages
1112 	 */
1113 	size = e_moddata - s_data;
1114 	kdata_cnt += cpr_count_pages(s_data, size,
1115 	    mapflag, bitfunc, DBG_SHOWRANGE);
1116 
1117 	/*
1118 	 * kvseg and kvalloc pages
1119 	 */
1120 	segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1121 	segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1122 	    mapflag, bitfunc, DBG_SHOWRANGE);
1123 
1124 	/* segment to support kernel memory usage above 32-bit space (4GB) */
1125 	if (kmem64.s_base)
1126 		segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1127 		    mapflag, bitfunc, DBG_SHOWRANGE);
1128 
1129 	CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_count_sensitive_kpages:\n"
1130 	    "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1131 	    kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt);
1132 
1133 	return (kdata_cnt + segkmem_cnt);
1134 }
1135 
1136 
1137 pgcnt_t
1138 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1139 {
1140 	pgcnt_t count = 0;
1141 
1142 	if (i_cpr_storage_desc_base) {
1143 		count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1144 		    (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1145 		    mapflag, bitfunc, DBG_SHOWRANGE);
1146 	}
1147 	if (i_cpr_storage_data_base) {
1148 		count += cpr_count_pages(i_cpr_storage_data_base,
1149 		    (size_t)mmu_ptob(i_cpr_storage_data_sz),
1150 		    mapflag, bitfunc, DBG_SHOWRANGE);
1151 	}
1152 	return (count);
1153 }
1154 
1155 
1156 /*
1157  * Derived from cpr_write_statefile().
1158  * Allocate (or reallocate after exhausting the supply) descriptors for each
1159  * chunk of contiguous sensitive kpages.
1160  */
1161 static int
1162 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1163     int retry)
1164 {
1165 	pgcnt_t npages;
1166 	int chunks;
1167 	csd_t	*descp, *end;
1168 	size_t	len;
1169 	char *str = "i_cpr_storage_desc_alloc:";
1170 
1171 	/*
1172 	 * On initial allocation, add some extra to cover overhead caused
1173 	 * by the allocation for the storage area later.
1174 	 */
1175 	if (retry == 0) {
1176 		chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1177 		    EXTRA_DESCS;
1178 		npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1179 		CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
1180 	} else {
1181 		CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
1182 		npages = *pgsp + 1;
1183 	}
1184 	/* Free old descriptors, if any */
1185 	if (*basepp)
1186 		kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1187 
1188 	descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1189 	if (descp == NULL) {
1190 		CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
1191 		return (ENOMEM);
1192 	}
1193 
1194 	*pgsp = npages;
1195 	len = mmu_ptob(npages);
1196 	end = *endpp = descp + (len / (sizeof (**basepp)));
1197 	CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1198 	    "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1199 	    (void *)*basepp, (void *)*endpp);
1200 	i_cpr_storage_desc_init(descp, npages, end);
1201 	return (0);
1202 }
1203 
1204 static void
1205 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1206 {
1207 	size_t	len = mmu_ptob(npages);
1208 
1209 	/* Initialize the descriptors to something impossible. */
1210 	bzero(descp, len);
1211 #ifdef	DEBUG
1212 	/*
1213 	 * This condition is tested by an ASSERT
1214 	 */
1215 	for (; descp < end; descp++)
1216 		descp->csd_dirty_spfn = (uint_t)-1;
1217 #endif
1218 }
1219 
1220 int
1221 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1222 {
1223 	int	error = 0;
1224 	uint_t	spin_cnt = 0;
1225 	csd_t	*descp;
1226 
1227 	/*
1228 	 * These following two variables need to be reinitialized
1229 	 * for each cpr cycle.
1230 	 */
1231 	i_cpr_sensitive_bytes_dumped = 0;
1232 	i_cpr_sensitive_pgs_dumped = 0;
1233 
1234 	if (i_cpr_storage_desc_base) {
1235 		for (descp = i_cpr_storage_desc_base;
1236 		    descp <= i_cpr_storage_desc_last_used; descp++) {
1237 			if (error = cpr_dump_sensitive(vp, descp))
1238 				return (error);
1239 			spin_cnt++;
1240 			if ((spin_cnt & 0x5F) == 1)
1241 				cpr_spinning_bar();
1242 		}
1243 		prom_printf(" \b");
1244 	}
1245 
1246 	CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1247 	    i_cpr_sensitive_pgs_dumped);
1248 	return (0);
1249 }
1250 
1251 
1252 /*
1253  * 1. Fill the cpr page descriptor with the info of the dirty pages
1254  *    and
1255  *    write the descriptor out. It will be used at resume.
1256  * 2. Write the clean data in stead of the dirty data out.
1257  *    Note: to save space, the clean data is already compressed.
1258  */
1259 static int
1260 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1261 {
1262 	int error = 0;
1263 	caddr_t datap;
1264 	cpd_t cpd;	/* cpr page descriptor */
1265 	pfn_t	dirty_spfn;
1266 	pgcnt_t dirty_npages;
1267 	size_t clean_sz;
1268 	caddr_t	clean_sva;
1269 	int	clean_compressed;
1270 	extern uchar_t cpr_pagecopy[];
1271 
1272 	dirty_spfn = descp->csd_dirty_spfn;
1273 	dirty_npages = descp->csd_dirty_npages;
1274 	clean_sva = (caddr_t)descp->csd_clean_sva;
1275 	clean_sz = descp->csd_clean_sz;
1276 	clean_compressed = descp->csd_clean_compressed;
1277 
1278 	/* Fill cpr page descriptor. */
1279 	cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1280 	cpd.cpd_pfn = dirty_spfn;
1281 	cpd.cpd_flag = 0;  /* must init to zero */
1282 	cpd.cpd_pages = dirty_npages;
1283 
1284 #ifdef	DEBUG
1285 	if ((cpd.cpd_usum = descp->csd_usum) != 0)
1286 		cpd.cpd_flag |= CPD_USUM;
1287 	if ((cpd.cpd_csum = descp->csd_csum) != 0)
1288 		cpd.cpd_flag |= CPD_CSUM;
1289 #endif
1290 
1291 	STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1292 
1293 	/*
1294 	 * The sensitive kpages are usually saved with compression
1295 	 * unless compression could not reduce the size of the data.
1296 	 * If user choose not to have the statefile compressed,
1297 	 * we need to decompress the data back before dumping it to disk.
1298 	 */
1299 	if (CPR->c_flags & C_COMPRESSING) {
1300 		cpd.cpd_length = clean_sz;
1301 		datap = clean_sva;
1302 		if (clean_compressed)
1303 			cpd.cpd_flag |= CPD_COMPRESS;
1304 	} else {
1305 		if (clean_compressed) {
1306 			cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1307 			    clean_sz, mmu_ptob(dirty_npages));
1308 			datap = (caddr_t)cpr_pagecopy;
1309 			ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1310 		} else {
1311 			cpd.cpd_length = clean_sz;
1312 			datap = clean_sva;
1313 		}
1314 		cpd.cpd_csum = 0;
1315 	}
1316 
1317 	/* Write cpr page descriptor */
1318 	error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1319 	if (error) {
1320 		CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1321 #ifdef DEBUG
1322 		debug_enter("cpr_dump_sensitive: cpr_write() page "
1323 		    "descriptor failed!\n");
1324 #endif
1325 		return (error);
1326 	}
1327 
1328 	i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1329 
1330 	/* Write page data */
1331 	error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1332 	if (error) {
1333 		CPR_DEBUG(CPR_DEBUG7, "error: %x\n", error);
1334 		CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1335 		CPR_DEBUG(CPR_DEBUG7, "cpr_write(%p, %p , %lx)\n",
1336 		    (void *)vp, (void *)datap, cpd.cpd_length);
1337 #ifdef DEBUG
1338 		debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1339 #endif
1340 		return (error);
1341 	}
1342 
1343 	i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1344 	i_cpr_sensitive_pgs_dumped += dirty_npages;
1345 
1346 	return (error);
1347 }
1348 
1349 
1350 /*
1351  * Sanity check to make sure that we have dumped right amount
1352  * of pages from different sources to statefile.
1353  */
1354 int
1355 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1356 {
1357 	uint_t total_pgs_dumped;
1358 
1359 	total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1360 
1361 	CPR_DEBUG(CPR_DEBUG7, "\ncheck_pgs: reg %d + sens %ld = %d, "
1362 	    "expect %d\n\n", regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1363 	    total_pgs_dumped, pgs_expected);
1364 
1365 	if (pgs_expected == total_pgs_dumped)
1366 		return (0);
1367 
1368 	return (EINVAL);
1369 }
1370 
1371 
1372 int
1373 i_cpr_reusefini(void)
1374 {
1375 	struct vnode *vp;
1376 	cdef_t *cdef;
1377 	size_t size;
1378 	char *bufp;
1379 	int rc;
1380 
1381 	if (cpr_reusable_mode)
1382 		cpr_reusable_mode = 0;
1383 
1384 	if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1385 		if (rc == EROFS) {
1386 			cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1387 			    "(uadmin %d %d)\nmust be done with / mounted "
1388 			    "writeable.\n", A_FREEZE, AD_REUSEFINI);
1389 		}
1390 		return (rc);
1391 	}
1392 
1393 	cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1394 	rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1395 
1396 	if (rc) {
1397 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1398 		    cpr_default_path, rc);
1399 	} else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1400 		cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1401 		    "prom values for %s", cpr_default_path,
1402 		    cpr_enumerate_promprops(&bufp, &size));
1403 		kmem_free(bufp, size);
1404 		rc = EINVAL;
1405 	} else {
1406 		/*
1407 		 * clean up prom properties
1408 		 */
1409 		rc = cpr_update_nvram(cdef->props);
1410 		if (rc == 0) {
1411 			/*
1412 			 * invalidate the disk copy and turn off reusable
1413 			 */
1414 			cdef->mini.magic = 0;
1415 			cdef->mini.reusable = 0;
1416 			if (rc = cpr_rdwr(UIO_WRITE, vp,
1417 			    &cdef->mini, sizeof (cdef->mini))) {
1418 				cpr_err(CE_WARN, "Failed writing %s, errno %d",
1419 				    cpr_default_path, rc);
1420 			}
1421 		}
1422 	}
1423 
1424 	(void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
1425 	VN_RELE(vp);
1426 	kmem_free(cdef, sizeof (*cdef));
1427 
1428 	return (rc);
1429 }
1430 
1431 
1432 int
1433 i_cpr_reuseinit(void)
1434 {
1435 	int rc = 0;
1436 
1437 	if (rc = cpr_default_setup(1))
1438 		return (rc);
1439 
1440 	/*
1441 	 * We need to validate default file
1442 	 */
1443 	rc = cpr_validate_definfo(1);
1444 	if (rc == 0)
1445 		cpr_reusable_mode = 1;
1446 	else if (rc == EROFS) {
1447 		cpr_err(CE_NOTE, "reuseinit must be performed "
1448 		    "while / is mounted writeable");
1449 	}
1450 
1451 	(void) cpr_default_setup(0);
1452 
1453 	return (rc);
1454 }
1455 
1456 
1457 int
1458 i_cpr_check_cprinfo(void)
1459 {
1460 	struct vnode *vp;
1461 	cmini_t mini;
1462 	int rc = 0;
1463 
1464 	if (rc = cpr_open_deffile(FREAD, &vp)) {
1465 		if (rc == ENOENT)
1466 			cpr_err(CE_NOTE, "cprinfo file does not "
1467 			    "exist.  You must run 'uadmin %d %d' "
1468 			    "command while / is mounted writeable,\n"
1469 			    "then reboot and run 'uadmin %d %d' "
1470 			    "to create a reusable statefile",
1471 			    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1472 		return (rc);
1473 	}
1474 
1475 	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1476 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
1477 	VN_RELE(vp);
1478 
1479 	if (rc) {
1480 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1481 		    cpr_default_path, rc);
1482 	} else if (mini.magic != CPR_DEFAULT_MAGIC) {
1483 		cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1484 		    "You must run 'uadmin %d %d' while / is mounted "
1485 		    "writeable, then reboot and run 'uadmin %d %d' "
1486 		    "to create a reusable statefile\n",
1487 		    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1488 		rc = EINVAL;
1489 	}
1490 
1491 	return (rc);
1492 }
1493 
1494 
1495 int
1496 i_cpr_reusable_supported(void)
1497 {
1498 	return (1);
1499 }
1500 
1501 
1502 /*
1503  * find prom phys pages and alloc space for a tmp copy
1504  */
1505 static int
1506 i_cpr_find_ppages(void)
1507 {
1508 	extern struct vnode prom_ppages;
1509 	struct page *pp;
1510 	struct memlist *pmem;
1511 	pgcnt_t npages, pcnt, scnt, vcnt;
1512 	pfn_t ppn, plast, *dst;
1513 	int mapflag;
1514 
1515 	cpr_clear_bitmaps();
1516 	mapflag = REGULAR_BITMAP;
1517 
1518 	/*
1519 	 * there should be a page_t for each phys page used by the kernel;
1520 	 * set a bit for each phys page not tracked by a page_t
1521 	 */
1522 	pcnt = 0;
1523 	memlist_read_lock();
1524 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1525 		npages = mmu_btop(pmem->size);
1526 		ppn = mmu_btop(pmem->address);
1527 		for (plast = ppn + npages; ppn < plast; ppn++) {
1528 			if (page_numtopp_nolock(ppn))
1529 				continue;
1530 			(void) cpr_setbit(ppn, mapflag);
1531 			pcnt++;
1532 		}
1533 	}
1534 	memlist_read_unlock();
1535 
1536 	/*
1537 	 * clear bits for phys pages in each segment
1538 	 */
1539 	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1540 
1541 	/*
1542 	 * set bits for phys pages referenced by the prom_ppages vnode;
1543 	 * these pages are mostly comprised of forthdebug words
1544 	 */
1545 	vcnt = 0;
1546 	for (pp = prom_ppages.v_pages; pp; ) {
1547 		if (cpr_setbit(pp->p_offset, mapflag) == 0)
1548 			vcnt++;
1549 		pp = pp->p_vpnext;
1550 		if (pp == prom_ppages.v_pages)
1551 			break;
1552 	}
1553 
1554 	/*
1555 	 * total number of prom pages are:
1556 	 * (non-page_t pages - seg pages + vnode pages)
1557 	 */
1558 	ppage_count = pcnt - scnt + vcnt;
1559 	CPR_DEBUG(CPR_DEBUG1,
1560 	    "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1561 	    pcnt, scnt, vcnt, ppage_count);
1562 
1563 	/*
1564 	 * alloc array of pfn_t to store phys page list
1565 	 */
1566 	pphys_list_size = ppage_count * sizeof (pfn_t);
1567 	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1568 	if (pphys_list == NULL) {
1569 		cpr_err(CE_WARN, "cannot alloc pphys_list");
1570 		return (ENOMEM);
1571 	}
1572 
1573 	/*
1574 	 * phys pages referenced in the bitmap should be
1575 	 * those used by the prom; scan bitmap and save
1576 	 * a list of prom phys page numbers
1577 	 */
1578 	dst = pphys_list;
1579 	memlist_read_lock();
1580 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1581 		npages = mmu_btop(pmem->size);
1582 		ppn = mmu_btop(pmem->address);
1583 		for (plast = ppn + npages; ppn < plast; ppn++) {
1584 			if (cpr_isset(ppn, mapflag)) {
1585 				ASSERT(dst < (pphys_list + ppage_count));
1586 				*dst++ = ppn;
1587 			}
1588 		}
1589 	}
1590 	memlist_read_unlock();
1591 
1592 	/*
1593 	 * allocate space to store prom pages
1594 	 */
1595 	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1596 	if (ppage_buf == NULL) {
1597 		kmem_free(pphys_list, pphys_list_size);
1598 		pphys_list = NULL;
1599 		cpr_err(CE_WARN, "cannot alloc ppage_buf");
1600 		return (ENOMEM);
1601 	}
1602 
1603 	return (0);
1604 }
1605 
1606 
1607 /*
1608  * save prom pages to kmem pages
1609  */
1610 static void
1611 i_cpr_save_ppages(void)
1612 {
1613 	pfn_t *pphys, *plast;
1614 	caddr_t dst;
1615 
1616 	/*
1617 	 * map in each prom page and copy to a kmem page
1618 	 */
1619 	dst = ppage_buf;
1620 	plast = pphys_list + ppage_count;
1621 	for (pphys = pphys_list; pphys < plast; pphys++) {
1622 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1623 		bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1624 		i_cpr_mapout(cpr_vaddr, 1);
1625 		dst += MMU_PAGESIZE;
1626 	}
1627 
1628 	CPR_DEBUG(CPR_DEBUG1, "saved %ld prom pages\n", ppage_count);
1629 }
1630 
1631 
1632 /*
1633  * restore prom pages from kmem pages
1634  */
1635 static void
1636 i_cpr_restore_ppages(void)
1637 {
1638 	pfn_t *pphys, *plast;
1639 	caddr_t src;
1640 
1641 	dcache_flushall();
1642 
1643 	/*
1644 	 * map in each prom page and copy from a kmem page
1645 	 */
1646 	src = ppage_buf;
1647 	plast = pphys_list + ppage_count;
1648 	for (pphys = pphys_list; pphys < plast; pphys++) {
1649 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1650 		bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1651 		i_cpr_mapout(cpr_vaddr, 1);
1652 		src += MMU_PAGESIZE;
1653 	}
1654 
1655 	dcache_flushall();
1656 
1657 	CPR_DEBUG(CPR_DEBUG1, "restored %ld prom pages\n", ppage_count);
1658 }
1659 
1660 
1661 /*
1662  * save/restore prom pages or free related allocs
1663  */
1664 int
1665 i_cpr_prom_pages(int action)
1666 {
1667 	int error;
1668 
1669 	if (action == CPR_PROM_SAVE) {
1670 		if (ppage_buf == NULL) {
1671 			ASSERT(pphys_list == NULL);
1672 			if (error = i_cpr_find_ppages())
1673 				return (error);
1674 			i_cpr_save_ppages();
1675 		}
1676 	} else if (action == CPR_PROM_RESTORE) {
1677 		i_cpr_restore_ppages();
1678 	} else if (action == CPR_PROM_FREE) {
1679 		if (pphys_list) {
1680 			ASSERT(pphys_list_size);
1681 			kmem_free(pphys_list, pphys_list_size);
1682 			pphys_list = NULL;
1683 			pphys_list_size = 0;
1684 		}
1685 		if (ppage_buf) {
1686 			ASSERT(ppage_count);
1687 			kmem_free(ppage_buf, mmu_ptob(ppage_count));
1688 			CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
1689 			    ppage_count);
1690 			ppage_buf = NULL;
1691 			ppage_count = 0;
1692 		}
1693 	}
1694 	return (0);
1695 }
1696 
1697 
1698 /*
1699  * record tlb data for the nucleus, bigktsb's, and the cpr module;
1700  * this data is later used by cprboot to install dtlb/itlb entries.
1701  * when we jump into the cpr module during the resume phase, those
1702  * mappings are needed until switching to the kernel trap table.
1703  * to make the dtte/itte info available during resume, we need
1704  * the info recorded prior to saving sensitive pages, otherwise
1705  * all the data would appear as NULLs.
1706  */
1707 static void
1708 i_cpr_save_tlbinfo(void)
1709 {
1710 	cti_t cti = {0};
1711 
1712 	/*
1713 	 * during resume - shortly after jumping into the cpr module,
1714 	 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1715 	 * index used for TSBs; skip is set so that any saved tte will
1716 	 * target other tlb offsets and prevent being lost during
1717 	 * resume.  now scan the dtlb and save locked entries,
1718 	 * then add entries for the tmp stack / data page and the
1719 	 * cpr thread structure.
1720 	 */
1721 	cti.dst = m_info.dtte;
1722 	cti.tail = cti.dst + CPR_MAX_TLB;
1723 	cti.reader = dtlb_rd_entry;
1724 	cti.writer = NULL;
1725 	cti.filter = i_cpr_lnb;
1726 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1727 
1728 	if (utsb_dtlb_ttenum != -1)
1729 		cti.skip = (1 << utsb_dtlb_ttenum);
1730 
1731 	if (utsb4m_dtlb_ttenum != -1)
1732 		cti.skip |= (1 << utsb4m_dtlb_ttenum);
1733 
1734 	i_cpr_scan_tlb(&cti);
1735 	i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1736 	i_cpr_make_tte(&cti, curthread, datava);
1737 
1738 	/*
1739 	 * scan itlb and save locked entries; add an entry for
1740 	 * the first text page of the cpr module; cprboot will
1741 	 * jump to that page after restoring kernel pages.
1742 	 */
1743 	cti.dst = m_info.itte;
1744 	cti.tail = cti.dst + CPR_MAX_TLB;
1745 	cti.reader = itlb_rd_entry;
1746 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1747 	cti.skip = 0;
1748 	i_cpr_scan_tlb(&cti);
1749 	i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1750 }
1751 
1752 
1753 /* ARGSUSED */
1754 int
1755 i_cpr_dump_setup(vnode_t *vp)
1756 {
1757 	/*
1758 	 * zero out m_info and add info to dtte/itte arrays
1759 	 */
1760 	bzero(&m_info, sizeof (m_info));
1761 	i_cpr_save_tlbinfo();
1762 	return (0);
1763 }
1764 
1765 
1766 int
1767 i_cpr_is_supported(int sleeptype)
1768 {
1769 	char es_prop[] = "energystar-v2";
1770 	pnode_t node;
1771 	int last;
1772 	extern int cpr_supported_override;
1773 	extern int cpr_platform_enable;
1774 
1775 	if (sleeptype != CPR_TODISK)
1776 		return (0);
1777 
1778 	/*
1779 	 * The next statement tests if a specific platform has turned off
1780 	 * cpr support.
1781 	 */
1782 	if (cpr_supported_override)
1783 		return (0);
1784 
1785 	/*
1786 	 * Do not inspect energystar-v* property if a platform has
1787 	 * specifically turned on cpr support
1788 	 */
1789 	if (cpr_platform_enable)
1790 		return (1);
1791 
1792 	node = prom_rootnode();
1793 	if (prom_getproplen(node, es_prop) != -1)
1794 		return (1);
1795 	last = strlen(es_prop) - 1;
1796 	es_prop[last] = '3';
1797 	return (prom_getproplen(node, es_prop) != -1);
1798 }
1799 
1800 
1801 /*
1802  * the actual size of the statefile data isn't known until after all the
1803  * compressed pages are written; even the inode size doesn't reflect the
1804  * data size since there are usually many extra fs blocks.  for recording
1805  * the actual data size, the first sector of the statefile is copied to
1806  * a tmp buf, and the copy is later updated and flushed to disk.
1807  */
1808 int
1809 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1810 {
1811 	extern int cpr_flush_write(vnode_t *);
1812 	static char cpr_sector[DEV_BSIZE];
1813 	cpr_ext bytes, *dst;
1814 
1815 	/*
1816 	 * this routine is called after cdd_t and csu_md_t are copied
1817 	 * to cpr_buf; mini-hack alert: the save/update method creates
1818 	 * a dependency on the combined struct size being >= one sector
1819 	 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1820 	 * over 1K bytes and will probably grow with any changes.
1821 	 *
1822 	 * copy when vp is NULL, flush when non-NULL
1823 	 */
1824 	if (vp == NULL) {
1825 		ASSERT((*bufpp - base) >= DEV_BSIZE);
1826 		bcopy(base, cpr_sector, sizeof (cpr_sector));
1827 		return (0);
1828 	} else {
1829 		bytes = dbtob(*blkno);
1830 		dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1831 		bcopy(&bytes, dst, sizeof (bytes));
1832 		bcopy(cpr_sector, base, sizeof (cpr_sector));
1833 		*bufpp = base + sizeof (cpr_sector);
1834 		*blkno = cpr_statefile_offset();
1835 		CPR_DEBUG(CPR_DEBUG1, "statefile data size: %ld\n\n", bytes);
1836 		return (cpr_flush_write(vp));
1837 	}
1838 }
1839 
1840 
1841 /*
1842  * Allocate bitmaps according to the phys_install list.
1843  */
1844 static int
1845 i_cpr_bitmap_setup(void)
1846 {
1847 	struct memlist *pmem;
1848 	cbd_t *dp, *tail;
1849 	void *space;
1850 	size_t size;
1851 
1852 	/*
1853 	 * The number of bitmap descriptors will be the count of
1854 	 * phys_install ranges plus 1 for a trailing NULL struct.
1855 	 */
1856 	cpr_nbitmaps = 1;
1857 	for (pmem = phys_install; pmem; pmem = pmem->next)
1858 		cpr_nbitmaps++;
1859 
1860 	if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1861 		cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1862 		    cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1863 		return (EFBIG);
1864 	}
1865 
1866 	/* Alloc an array of bitmap descriptors. */
1867 	dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1868 	if (dp == NULL) {
1869 		cpr_nbitmaps = 0;
1870 		return (ENOMEM);
1871 	}
1872 	tail = dp + cpr_nbitmaps;
1873 
1874 	CPR->c_bmda = dp;
1875 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1876 		size = BITMAP_BYTES(pmem->size);
1877 		space = kmem_zalloc(size * 2, KM_NOSLEEP);
1878 		if (space == NULL)
1879 			return (ENOMEM);
1880 		ASSERT(dp < tail);
1881 		dp->cbd_magic = CPR_BITMAP_MAGIC;
1882 		dp->cbd_spfn = mmu_btop(pmem->address);
1883 		dp->cbd_epfn = mmu_btop(pmem->address + pmem->size) - 1;
1884 		dp->cbd_size = size;
1885 		dp->cbd_reg_bitmap = (cpr_ptr)space;
1886 		dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1887 		dp++;
1888 	}
1889 
1890 	/* set magic for the last descriptor */
1891 	ASSERT(dp == (tail - 1));
1892 	dp->cbd_magic = CPR_BITMAP_MAGIC;
1893 
1894 	return (0);
1895 }
1896 
1897 
1898 void
1899 i_cpr_bitmap_cleanup(void)
1900 {
1901 	cbd_t *dp;
1902 
1903 	if (CPR->c_bmda == NULL)
1904 		return;
1905 	for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1906 		kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1907 	kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1908 	CPR->c_bmda = NULL;
1909 	cpr_nbitmaps = 0;
1910 }
1911 
1912 
1913 /*
1914  * A "regular" and "volatile" bitmap are created for each range of
1915  * physical memory.  The volatile maps are used to count and track pages
1916  * susceptible to heap corruption - caused by drivers that allocate mem
1917  * during VOP_DUMP(); the regular maps are used for all the other non-
1918  * susceptible pages.  Before writing the bitmaps to the statefile,
1919  * each bitmap pair gets merged to simplify handling within cprboot.
1920  */
1921 int
1922 i_cpr_alloc_bitmaps(void)
1923 {
1924 	int err;
1925 
1926 	memlist_read_lock();
1927 	err = i_cpr_bitmap_setup();
1928 	memlist_read_unlock();
1929 	if (err)
1930 		i_cpr_bitmap_cleanup();
1931 	return (err);
1932 }
1933 
1934 
1935 
1936 /*
1937  * Power down the system.
1938  */
1939 int
1940 i_cpr_power_down(int sleeptype)
1941 {
1942 	int is_defined = 0;
1943 	char *wordexists = "p\" power-off\" find nip swap l! ";
1944 	char *req = "power-off";
1945 
1946 	ASSERT(sleeptype == CPR_TODISK);
1947 
1948 	/*
1949 	 * is_defined has value -1 when defined
1950 	 */
1951 	prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0);
1952 	if (is_defined) {
1953 		CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req);
1954 		prom_interpret(req, 0, 0, 0, 0, 0);
1955 	}
1956 	/*
1957 	 * Only returns if failed
1958 	 */
1959 	return (EIO);
1960 }
1961 
1962 void
1963 i_cpr_stop_other_cpus(void)
1964 {
1965 	stop_other_cpus();
1966 }
1967 
1968 /*
1969  *	Save context for the specified CPU
1970  */
1971 /* ARGSUSED */
1972 void *
1973 i_cpr_save_context(void *arg)
1974 {
1975 	/*
1976 	 * Not yet
1977 	 */
1978 	ASSERT(0);
1979 	return (NULL);
1980 }
1981 
1982 void
1983 i_cpr_pre_resume_cpus(void)
1984 {
1985 	/*
1986 	 * Not yet
1987 	 */
1988 	ASSERT(0);
1989 }
1990 
1991 void
1992 i_cpr_post_resume_cpus(void)
1993 {
1994 	/*
1995 	 * Not yet
1996 	 */
1997 	ASSERT(0);
1998 }
1999 
2000 /*
2001  * nothing to do
2002  */
2003 void
2004 i_cpr_alloc_cpus(void)
2005 {
2006 }
2007 
2008 /*
2009  * nothing to do
2010  */
2011 void
2012 i_cpr_free_cpus(void)
2013 {
2014 }
2015 
2016 /* ARGSUSED */
2017 void
2018 i_cpr_save_configuration(dev_info_t *dip)
2019 {
2020 	/*
2021 	 * this is a no-op on sparc
2022 	 */
2023 }
2024 
2025 /* ARGSUSED */
2026 void
2027 i_cpr_restore_configuration(dev_info_t *dip)
2028 {
2029 	/*
2030 	 * this is a no-op on sparc
2031 	 */
2032 }
2033