xref: /titanic_50/usr/src/uts/sun4/os/machdep.c (revision 275c9da86e89f8abf71135cf63d9fc23671b2e60)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/cmp.h>
51 #include <sys/platform_module.h>
52 #include <sys/bl.h>
53 #include <sys/nvpair.h>
54 #include <sys/kdi_impl.h>
55 #include <sys/machsystm.h>
56 #include <sys/sysmacros.h>
57 #include <sys/promif.h>
58 #include <sys/pool_pset.h>
59 #include <sys/mem.h>
60 #include <sys/dumphdr.h>
61 #include <vm/seg_kmem.h>
62 #include <sys/hold_page.h>
63 #include <sys/cpu.h>
64 
65 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
66 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
67 
68 /*
69  * Initialize kernel thread's stack.
70  */
71 caddr_t
72 thread_stk_init(caddr_t stk)
73 {
74 	kfpu_t *fp;
75 	ulong_t align;
76 
77 	/* allocate extra space for floating point state */
78 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
79 	align = (uintptr_t)stk & 0x3f;
80 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
81 	fp = (kfpu_t *)stk;
82 	fp->fpu_fprs = 0;
83 
84 	stk -= SA(MINFRAME);
85 	return (stk);
86 }
87 
88 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
89 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
90 
91 kmem_cache_t	*wbuf32_cache;
92 kmem_cache_t	*wbuf64_cache;
93 
94 void
95 lwp_stk_cache_init(void)
96 {
97 	/*
98 	 * Window buffers are allocated from the static arena
99 	 * because they are accessed at TL>0. We also must use
100 	 * KMC_NOHASH to prevent them from straddling page
101 	 * boundaries as they are accessed by physical address.
102 	 */
103 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
104 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
105 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
106 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
107 }
108 
109 /*
110  * Initialize lwp's kernel stack.
111  * Note that now that the floating point register save area (kfpu_t)
112  * has been broken out from machpcb and aligned on a 64 byte boundary so that
113  * we can do block load/stores to/from it, there are a couple of potential
114  * optimizations to save stack space. 1. The floating point register save
115  * area could be aligned on a 16 byte boundary, and the floating point code
116  * changed to (a) check the alignment and (b) use different save/restore
117  * macros depending upon the alignment. 2. The lwp_stk_init code below
118  * could be changed to calculate if less space would be wasted if machpcb
119  * was first instead of second. However there is a REGOFF macro used in
120  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
121  * register area is a fixed distance from the %sp, and would have to be
122  * changed to a pointer or something...JJ said later.
123  */
124 caddr_t
125 lwp_stk_init(klwp_t *lwp, caddr_t stk)
126 {
127 	struct machpcb *mpcb;
128 	kfpu_t *fp;
129 	uintptr_t aln;
130 
131 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
132 	aln = (uintptr_t)stk & 0x3F;
133 	stk -= aln;
134 	fp = (kfpu_t *)stk;
135 	stk -= SA(sizeof (struct machpcb));
136 	mpcb = (struct machpcb *)stk;
137 	bzero(mpcb, sizeof (struct machpcb));
138 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
139 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
140 	lwp->lwp_fpu = (void *)fp;
141 	mpcb->mpcb_fpu = fp;
142 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
143 	mpcb->mpcb_thread = lwp->lwp_thread;
144 	mpcb->mpcb_wbcnt = 0;
145 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
146 		mpcb->mpcb_wstate = WSTATE_USER32;
147 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
148 	} else {
149 		mpcb->mpcb_wstate = WSTATE_USER64;
150 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
151 	}
152 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
153 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
154 	mpcb->mpcb_pa = va_to_pa(mpcb);
155 	return (stk);
156 }
157 
158 void
159 lwp_stk_fini(klwp_t *lwp)
160 {
161 	struct machpcb *mpcb = lwptompcb(lwp);
162 
163 	/*
164 	 * there might be windows still in the wbuf due to unmapped
165 	 * stack, misaligned stack pointer, etc.  We just free it.
166 	 */
167 	mpcb->mpcb_wbcnt = 0;
168 	if (mpcb->mpcb_wstate == WSTATE_USER32)
169 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
170 	else
171 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
172 	mpcb->mpcb_wbuf = NULL;
173 	mpcb->mpcb_wbuf_pa = -1;
174 }
175 
176 
177 /*
178  * Copy regs from parent to child.
179  */
180 void
181 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
182 {
183 	kthread_t *t, *pt = lwptot(lwp);
184 	struct machpcb *mpcb = lwptompcb(clwp);
185 	struct machpcb *pmpcb = lwptompcb(lwp);
186 	kfpu_t *fp, *pfp = lwptofpu(lwp);
187 	caddr_t wbuf;
188 	uint_t wstate;
189 
190 	t = mpcb->mpcb_thread;
191 	/*
192 	 * remember child's fp and wbuf since they will get erased during
193 	 * the bcopy.
194 	 */
195 	fp = mpcb->mpcb_fpu;
196 	wbuf = mpcb->mpcb_wbuf;
197 	wstate = mpcb->mpcb_wstate;
198 	/*
199 	 * Don't copy mpcb_frame since we hand-crafted it
200 	 * in thread_load().
201 	 */
202 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
203 	mpcb->mpcb_thread = t;
204 	mpcb->mpcb_fpu = fp;
205 	fp->fpu_q = mpcb->mpcb_fpu_q;
206 
207 	/*
208 	 * It is theoretically possibly for the lwp's wstate to
209 	 * be different from its value assigned in lwp_stk_init,
210 	 * since lwp_stk_init assumed the data model of the process.
211 	 * Here, we took on the data model of the cloned lwp.
212 	 */
213 	if (mpcb->mpcb_wstate != wstate) {
214 		if (wstate == WSTATE_USER32) {
215 			kmem_cache_free(wbuf32_cache, wbuf);
216 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
217 			wstate = WSTATE_USER64;
218 		} else {
219 			kmem_cache_free(wbuf64_cache, wbuf);
220 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
221 			wstate = WSTATE_USER32;
222 		}
223 	}
224 
225 	mpcb->mpcb_pa = va_to_pa(mpcb);
226 	mpcb->mpcb_wbuf = wbuf;
227 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
228 
229 	ASSERT(mpcb->mpcb_wstate == wstate);
230 
231 	if (mpcb->mpcb_wbcnt != 0) {
232 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
233 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
234 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
235 	}
236 
237 	if (pt == curthread)
238 		pfp->fpu_fprs = _fp_read_fprs();
239 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
240 		if (pt == curthread && fpu_exists) {
241 			save_gsr(clwp->lwp_fpu);
242 		} else {
243 			uint64_t gsr;
244 			gsr = get_gsr(lwp->lwp_fpu);
245 			set_gsr(gsr, clwp->lwp_fpu);
246 		}
247 		fp_fork(lwp, clwp);
248 	}
249 }
250 
251 /*
252  * Free lwp fpu regs.
253  */
254 void
255 lwp_freeregs(klwp_t *lwp, int isexec)
256 {
257 	kfpu_t *fp = lwptofpu(lwp);
258 
259 	if (lwptot(lwp) == curthread)
260 		fp->fpu_fprs = _fp_read_fprs();
261 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
262 		fp_free(fp, isexec);
263 }
264 
265 /*
266  * This function is currently unused on sparc.
267  */
268 /*ARGSUSED*/
269 void
270 lwp_attach_brand_hdlrs(klwp_t *lwp)
271 {}
272 
273 /*
274  * fill in the extra register state area specified with the
275  * specified lwp's platform-dependent non-floating-point extra
276  * register state information
277  */
278 /* ARGSUSED */
279 void
280 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
281 {
282 	/* for sun4u nothing to do here, added for symmetry */
283 }
284 
285 /*
286  * fill in the extra register state area specified with the specified lwp's
287  * platform-dependent floating-point extra register state information.
288  * NOTE:  'lwp' might not correspond to 'curthread' since this is
289  * called from code in /proc to get the registers of another lwp.
290  */
291 void
292 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
293 {
294 	prxregset_t *xregs = (prxregset_t *)xrp;
295 	kfpu_t *fp = lwptofpu(lwp);
296 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
297 	uint64_t gsr;
298 
299 	/*
300 	 * fp_fksave() does not flush the GSR register into
301 	 * the lwp area, so do it now
302 	 */
303 	kpreempt_disable();
304 	if (ttolwp(curthread) == lwp && fpu_exists) {
305 		fp->fpu_fprs = _fp_read_fprs();
306 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
307 			_fp_write_fprs(fprs);
308 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
309 		}
310 		save_gsr(fp);
311 	}
312 	gsr = get_gsr(fp);
313 	kpreempt_enable();
314 	PRXREG_GSR(xregs) = gsr;
315 }
316 
317 /*
318  * set the specified lwp's platform-dependent non-floating-point
319  * extra register state based on the specified input
320  */
321 /* ARGSUSED */
322 void
323 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
324 {
325 	/* for sun4u nothing to do here, added for symmetry */
326 }
327 
328 /*
329  * set the specified lwp's platform-dependent floating-point
330  * extra register state based on the specified input
331  */
332 void
333 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
334 {
335 	prxregset_t *xregs = (prxregset_t *)xrp;
336 	kfpu_t *fp = lwptofpu(lwp);
337 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
338 	uint64_t gsr = PRXREG_GSR(xregs);
339 
340 	kpreempt_disable();
341 	set_gsr(gsr, lwptofpu(lwp));
342 
343 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
344 		fp->fpu_fprs = _fp_read_fprs();
345 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
346 			_fp_write_fprs(fprs);
347 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
348 		}
349 		restore_gsr(lwptofpu(lwp));
350 	}
351 	kpreempt_enable();
352 }
353 
354 /*
355  * fill in the sun4u asrs, ie, the lwp's platform-dependent
356  * non-floating-point extra register state information
357  */
358 /* ARGSUSED */
359 void
360 getasrs(klwp_t *lwp, asrset_t asr)
361 {
362 	/* for sun4u nothing to do here, added for symmetry */
363 }
364 
365 /*
366  * fill in the sun4u asrs, ie, the lwp's platform-dependent
367  * floating-point extra register state information
368  */
369 void
370 getfpasrs(klwp_t *lwp, asrset_t asr)
371 {
372 	kfpu_t *fp = lwptofpu(lwp);
373 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
374 
375 	kpreempt_disable();
376 	if (ttolwp(curthread) == lwp)
377 		fp->fpu_fprs = _fp_read_fprs();
378 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
379 		if (fpu_exists && ttolwp(curthread) == lwp) {
380 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
381 				_fp_write_fprs(fprs);
382 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
383 			}
384 			save_gsr(fp);
385 		}
386 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
387 	}
388 	kpreempt_enable();
389 }
390 
391 /*
392  * set the sun4u asrs, ie, the lwp's platform-dependent
393  * non-floating-point extra register state information
394  */
395 /* ARGSUSED */
396 void
397 setasrs(klwp_t *lwp, asrset_t asr)
398 {
399 	/* for sun4u nothing to do here, added for symmetry */
400 }
401 
402 void
403 setfpasrs(klwp_t *lwp, asrset_t asr)
404 {
405 	kfpu_t *fp = lwptofpu(lwp);
406 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
407 
408 	kpreempt_disable();
409 	if (ttolwp(curthread) == lwp)
410 		fp->fpu_fprs = _fp_read_fprs();
411 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
412 		set_gsr(asr[ASR_GSR], fp);
413 		if (fpu_exists && ttolwp(curthread) == lwp) {
414 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
415 				_fp_write_fprs(fprs);
416 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
417 			}
418 			restore_gsr(fp);
419 		}
420 	}
421 	kpreempt_enable();
422 }
423 
424 /*
425  * Create interrupt kstats for this CPU.
426  */
427 void
428 cpu_create_intrstat(cpu_t *cp)
429 {
430 	int		i;
431 	kstat_t		*intr_ksp;
432 	kstat_named_t	*knp;
433 	char		name[KSTAT_STRLEN];
434 	zoneid_t	zoneid;
435 
436 	ASSERT(MUTEX_HELD(&cpu_lock));
437 
438 	if (pool_pset_enabled())
439 		zoneid = GLOBAL_ZONEID;
440 	else
441 		zoneid = ALL_ZONES;
442 
443 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
444 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
445 
446 	/*
447 	 * Initialize each PIL's named kstat
448 	 */
449 	if (intr_ksp != NULL) {
450 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
451 		knp = (kstat_named_t *)intr_ksp->ks_data;
452 		intr_ksp->ks_private = cp;
453 		for (i = 0; i < PIL_MAX; i++) {
454 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
455 			    i + 1);
456 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
457 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
458 			    i + 1);
459 			kstat_named_init(&knp[(i * 2) + 1], name,
460 			    KSTAT_DATA_UINT64);
461 		}
462 		kstat_install(intr_ksp);
463 	}
464 }
465 
466 /*
467  * Delete interrupt kstats for this CPU.
468  */
469 void
470 cpu_delete_intrstat(cpu_t *cp)
471 {
472 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
473 }
474 
475 /*
476  * Convert interrupt statistics from CPU ticks to nanoseconds and
477  * update kstat.
478  */
479 int
480 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
481 {
482 	kstat_named_t	*knp = ksp->ks_data;
483 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
484 	int		i;
485 
486 	if (rw == KSTAT_WRITE)
487 		return (EACCES);
488 
489 	/*
490 	 * We use separate passes to copy and convert the statistics to
491 	 * nanoseconds. This assures that the snapshot of the data is as
492 	 * self-consistent as possible.
493 	 */
494 
495 	for (i = 0; i < PIL_MAX; i++) {
496 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
497 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
498 	}
499 
500 	for (i = 0; i < PIL_MAX; i++) {
501 		knp[i * 2].value.ui64 =
502 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
503 		    cpup->cpu_id);
504 	}
505 
506 	return (0);
507 }
508 
509 /*
510  * Called by common/os/cpu.c for psrinfo(1m) kstats
511  */
512 char *
513 cpu_fru_fmri(cpu_t *cp)
514 {
515 	return (cpunodes[cp->cpu_id].fru_fmri);
516 }
517 
518 /*
519  * An interrupt thread is ending a time slice, so compute the interval it
520  * ran for and update the statistic for its PIL.
521  */
522 void
523 cpu_intr_swtch_enter(kthread_id_t t)
524 {
525 	uint64_t	interval;
526 	uint64_t	start;
527 	cpu_t		*cpu;
528 
529 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
530 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
531 
532 	/*
533 	 * We could be here with a zero timestamp. This could happen if:
534 	 * an interrupt thread which no longer has a pinned thread underneath
535 	 * it (i.e. it blocked at some point in its past) has finished running
536 	 * its handler. intr_thread() updated the interrupt statistic for its
537 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
538 	 * return to, swtch() gets called and we end up here.
539 	 *
540 	 * It can also happen if an interrupt thread in intr_thread() calls
541 	 * preempt. It will have already taken care of updating stats. In
542 	 * this event, the interrupt thread will be runnable.
543 	 */
544 	if (t->t_intr_start) {
545 		do {
546 			start = t->t_intr_start;
547 			interval = gettick_counter() - start;
548 		} while (cas64(&t->t_intr_start, start, 0) != start);
549 		cpu = CPU;
550 		if (cpu->cpu_m.divisor > 1)
551 			interval *= cpu->cpu_m.divisor;
552 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
553 
554 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
555 		    interval);
556 	} else
557 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
558 }
559 
560 
561 /*
562  * An interrupt thread is returning from swtch(). Place a starting timestamp
563  * in its thread structure.
564  */
565 void
566 cpu_intr_swtch_exit(kthread_id_t t)
567 {
568 	uint64_t ts;
569 
570 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
571 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
572 
573 	do {
574 		ts = t->t_intr_start;
575 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
576 }
577 
578 
579 int
580 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
581 {
582 	if (&plat_blacklist)
583 		return (plat_blacklist(cmd, scheme, fmri, class));
584 
585 	return (ENOTSUP);
586 }
587 
588 int
589 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
590 {
591 	extern void kdi_flush_caches(void);
592 	size_t nread = 0;
593 	uint32_t word;
594 	int slop, i;
595 
596 	kdi_flush_caches();
597 	membar_enter();
598 
599 	/* We might not begin on a word boundary. */
600 	if ((slop = addr & 3) != 0) {
601 		word = ldphys(addr & ~3);
602 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
603 			*buf++ = ((uchar_t *)&word)[i];
604 		addr = roundup(addr, 4);
605 	}
606 
607 	while (nbytes > 0) {
608 		word = ldphys(addr);
609 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
610 			*buf++ = ((uchar_t *)&word)[i];
611 	}
612 
613 	kdi_flush_caches();
614 
615 	*ncopiedp = nread;
616 	return (0);
617 }
618 
619 int
620 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
621 {
622 	extern void kdi_flush_caches(void);
623 	size_t nwritten = 0;
624 	uint32_t word;
625 	int slop, i;
626 
627 	kdi_flush_caches();
628 
629 	/* We might not begin on a word boundary. */
630 	if ((slop = addr & 3) != 0) {
631 		word = ldphys(addr & ~3);
632 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
633 			((uchar_t *)&word)[i] = *buf++;
634 		stphys(addr & ~3, word);
635 		addr = roundup(addr, 4);
636 	}
637 
638 	while (nbytes > 3) {
639 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
640 			((uchar_t *)&word)[i] = *buf++;
641 		stphys(addr, word);
642 		addr += 4;
643 	}
644 
645 	/* We might not end with a whole word. */
646 	if (nbytes > 0) {
647 		word = ldphys(addr);
648 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
649 			((uchar_t *)&word)[i] = *buf++;
650 		stphys(addr, word);
651 	}
652 
653 	membar_enter();
654 	kdi_flush_caches();
655 
656 	*ncopiedp = nwritten;
657 	return (0);
658 }
659 
660 static void
661 kdi_kernpanic(struct regs *regs, uint_t tt)
662 {
663 	sync_reg_buf = *regs;
664 	sync_tt = tt;
665 
666 	sync_handler();
667 }
668 
669 static void
670 kdi_plat_call(void (*platfn)(void))
671 {
672 	if (platfn != NULL) {
673 		prom_suspend_prepost();
674 		platfn();
675 		prom_resume_prepost();
676 	}
677 }
678 
679 void
680 mach_kdi_init(kdi_t *kdi)
681 {
682 	kdi->kdi_plat_call = kdi_plat_call;
683 	kdi->kdi_kmdb_enter = kmdb_enter;
684 	kdi->mkdi_cpu_index = kdi_cpu_index;
685 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
686 	kdi->mkdi_kernpanic = kdi_kernpanic;
687 }
688 
689 
690 /*
691  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
692  * long, and it fills in the array with the time spent on cpu in
693  * each of the mstates, where time is returned in nsec.
694  *
695  * No guarantee is made that the returned values in times[] will
696  * monotonically increase on sequential calls, although this will
697  * be true in the long run. Any such guarantee must be handled by
698  * the caller, if needed. This can happen if we fail to account
699  * for elapsed time due to a generation counter conflict, yet we
700  * did account for it on a prior call (see below).
701  *
702  * The complication is that the cpu in question may be updating
703  * its microstate at the same time that we are reading it.
704  * Because the microstate is only updated when the CPU's state
705  * changes, the values in cpu_intracct[] can be indefinitely out
706  * of date. To determine true current values, it is necessary to
707  * compare the current time with cpu_mstate_start, and add the
708  * difference to times[cpu_mstate].
709  *
710  * This can be a problem if those values are changing out from
711  * under us. Because the code path in new_cpu_mstate() is
712  * performance critical, we have not added a lock to it. Instead,
713  * we have added a generation counter. Before beginning
714  * modifications, the counter is set to 0. After modifications,
715  * it is set to the old value plus one.
716  *
717  * get_cpu_mstate() will not consider the values of cpu_mstate
718  * and cpu_mstate_start to be usable unless the value of
719  * cpu_mstate_gen is both non-zero and unchanged, both before and
720  * after reading the mstate information. Note that we must
721  * protect against out-of-order loads around accesses to the
722  * generation counter. Also, this is a best effort approach in
723  * that we do not retry should the counter be found to have
724  * changed.
725  *
726  * cpu_intracct[] is used to identify time spent in each CPU
727  * mstate while handling interrupts. Such time should be reported
728  * against system time, and so is subtracted out from its
729  * corresponding cpu_acct[] time and added to
730  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
731  * %ticks, but acct time may be stored as %sticks, thus requiring
732  * different conversions before they can be compared.
733  */
734 
735 void
736 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
737 {
738 	int i;
739 	hrtime_t now, start;
740 	uint16_t gen;
741 	uint16_t state;
742 	hrtime_t intracct[NCMSTATES];
743 
744 	/*
745 	 * Load all volatile state under the protection of membar.
746 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
747 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
748 	 * arrives after we make our last check of cpu_mstate_gen.
749 	 */
750 
751 	now = gethrtime_unscaled();
752 	gen = cpu->cpu_mstate_gen;
753 
754 	membar_consumer();	/* guarantee load ordering */
755 	start = cpu->cpu_mstate_start;
756 	state = cpu->cpu_mstate;
757 	for (i = 0; i < NCMSTATES; i++) {
758 		intracct[i] = cpu->cpu_intracct[i];
759 		times[i] = cpu->cpu_acct[i];
760 	}
761 	membar_consumer();	/* guarantee load ordering */
762 
763 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
764 		times[state] += now - start;
765 
766 	for (i = 0; i < NCMSTATES; i++) {
767 		scalehrtime(&times[i]);
768 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
769 	}
770 
771 	for (i = 0; i < NCMSTATES; i++) {
772 		if (i == CMS_SYSTEM)
773 			continue;
774 		times[i] -= intracct[i];
775 		if (times[i] < 0) {
776 			intracct[i] += times[i];
777 			times[i] = 0;
778 		}
779 		times[CMS_SYSTEM] += intracct[i];
780 	}
781 }
782 
783 void
784 mach_cpu_pause(volatile char *safe)
785 {
786 	/*
787 	 * This cpu is now safe.
788 	 */
789 	*safe = PAUSE_WAIT;
790 	membar_enter(); /* make sure stores are flushed */
791 
792 	/*
793 	 * Now we wait.  When we are allowed to continue, safe
794 	 * will be set to PAUSE_IDLE.
795 	 */
796 	while (*safe != PAUSE_IDLE)
797 		SMT_PAUSE();
798 }
799 
800 /*ARGSUSED*/
801 int
802 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
803 {
804 	return (ENOTSUP);
805 }
806 
807 int
808 dump_plat_addr()
809 {
810 	return (0);
811 }
812 
813 void
814 dump_plat_pfn()
815 {
816 }
817 
818 /* ARGSUSED */
819 int
820 dump_plat_data(void *dump_cdata)
821 {
822 	return (0);
823 }
824 
825 /* ARGSUSED */
826 int
827 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
828 {
829 	return (PLAT_HOLD_OK);
830 }
831 
832 /* ARGSUSED */
833 void
834 plat_release_page(page_t *pp)
835 {
836 }
837