xref: /titanic_51/usr/src/uts/sun4/os/machdep.c (revision ac4d633f367252125bb35e97c5725d2aa68c1291)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/chip.h>
51 #include <sys/cmp.h>
52 #include <sys/platform_module.h>
53 #include <sys/bl.h>
54 #include <sys/nvpair.h>
55 #include <sys/kdi_impl.h>
56 #include <sys/machsystm.h>
57 #include <sys/sysmacros.h>
58 #include <sys/promif.h>
59 #include <sys/pool_pset.h>
60 #include <vm/seg_kmem.h>
61 
62 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
63 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
64 
65 /*
66  * Initialize kernel thread's stack.
67  */
68 caddr_t
69 thread_stk_init(caddr_t stk)
70 {
71 	kfpu_t *fp;
72 	ulong_t align;
73 
74 	/* allocate extra space for floating point state */
75 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
76 	align = (uintptr_t)stk & 0x3f;
77 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
78 	fp = (kfpu_t *)stk;
79 	fp->fpu_fprs = 0;
80 
81 	stk -= SA(MINFRAME);
82 	return (stk);
83 }
84 
85 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
86 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
87 
88 kmem_cache_t	*wbuf32_cache;
89 kmem_cache_t	*wbuf64_cache;
90 
91 void
92 lwp_stk_cache_init(void)
93 {
94 	/*
95 	 * Window buffers are allocated from the static arena
96 	 * because they are accessed at TL>0. We also must use
97 	 * KMC_NOHASH to prevent them from straddling page
98 	 * boundaries as they are accessed by physical address.
99 	 */
100 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
101 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
102 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
103 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
104 }
105 
106 /*
107  * Initialize lwp's kernel stack.
108  * Note that now that the floating point register save area (kfpu_t)
109  * has been broken out from machpcb and aligned on a 64 byte boundary so that
110  * we can do block load/stores to/from it, there are a couple of potential
111  * optimizations to save stack space. 1. The floating point register save
112  * area could be aligned on a 16 byte boundary, and the floating point code
113  * changed to (a) check the alignment and (b) use different save/restore
114  * macros depending upon the alignment. 2. The lwp_stk_init code below
115  * could be changed to calculate if less space would be wasted if machpcb
116  * was first instead of second. However there is a REGOFF macro used in
117  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
118  * register area is a fixed distance from the %sp, and would have to be
119  * changed to a pointer or something...JJ said later.
120  */
121 caddr_t
122 lwp_stk_init(klwp_t *lwp, caddr_t stk)
123 {
124 	struct machpcb *mpcb;
125 	kfpu_t *fp;
126 	uintptr_t aln;
127 
128 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
129 	aln = (uintptr_t)stk & 0x3F;
130 	stk -= aln;
131 	fp = (kfpu_t *)stk;
132 	stk -= SA(sizeof (struct machpcb));
133 	mpcb = (struct machpcb *)stk;
134 	bzero(mpcb, sizeof (struct machpcb));
135 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
136 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
137 	lwp->lwp_fpu = (void *)fp;
138 	mpcb->mpcb_fpu = fp;
139 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
140 	mpcb->mpcb_thread = lwp->lwp_thread;
141 	mpcb->mpcb_wbcnt = 0;
142 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
143 		mpcb->mpcb_wstate = WSTATE_USER32;
144 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
145 	} else {
146 		mpcb->mpcb_wstate = WSTATE_USER64;
147 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
148 	}
149 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
150 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
151 	mpcb->mpcb_pa = va_to_pa(mpcb);
152 	return (stk);
153 }
154 
155 void
156 lwp_stk_fini(klwp_t *lwp)
157 {
158 	struct machpcb *mpcb = lwptompcb(lwp);
159 
160 	/*
161 	 * there might be windows still in the wbuf due to unmapped
162 	 * stack, misaligned stack pointer, etc.  We just free it.
163 	 */
164 	mpcb->mpcb_wbcnt = 0;
165 	if (mpcb->mpcb_wstate == WSTATE_USER32)
166 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
167 	else
168 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
169 	mpcb->mpcb_wbuf = NULL;
170 	mpcb->mpcb_wbuf_pa = -1;
171 }
172 
173 
174 /*
175  * Copy regs from parent to child.
176  */
177 void
178 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
179 {
180 	kthread_t *t, *pt = lwptot(lwp);
181 	struct machpcb *mpcb = lwptompcb(clwp);
182 	struct machpcb *pmpcb = lwptompcb(lwp);
183 	kfpu_t *fp, *pfp = lwptofpu(lwp);
184 	caddr_t wbuf;
185 	uint_t wstate;
186 
187 	t = mpcb->mpcb_thread;
188 	/*
189 	 * remember child's fp and wbuf since they will get erased during
190 	 * the bcopy.
191 	 */
192 	fp = mpcb->mpcb_fpu;
193 	wbuf = mpcb->mpcb_wbuf;
194 	wstate = mpcb->mpcb_wstate;
195 	/*
196 	 * Don't copy mpcb_frame since we hand-crafted it
197 	 * in thread_load().
198 	 */
199 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
200 	mpcb->mpcb_thread = t;
201 	mpcb->mpcb_fpu = fp;
202 	fp->fpu_q = mpcb->mpcb_fpu_q;
203 
204 	/*
205 	 * It is theoretically possibly for the lwp's wstate to
206 	 * be different from its value assigned in lwp_stk_init,
207 	 * since lwp_stk_init assumed the data model of the process.
208 	 * Here, we took on the data model of the cloned lwp.
209 	 */
210 	if (mpcb->mpcb_wstate != wstate) {
211 		if (wstate == WSTATE_USER32) {
212 			kmem_cache_free(wbuf32_cache, wbuf);
213 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
214 			wstate = WSTATE_USER64;
215 		} else {
216 			kmem_cache_free(wbuf64_cache, wbuf);
217 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
218 			wstate = WSTATE_USER32;
219 		}
220 	}
221 
222 	mpcb->mpcb_pa = va_to_pa(mpcb);
223 	mpcb->mpcb_wbuf = wbuf;
224 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
225 
226 	ASSERT(mpcb->mpcb_wstate == wstate);
227 
228 	if (mpcb->mpcb_wbcnt != 0) {
229 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
230 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
231 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
232 	}
233 
234 	if (pt == curthread)
235 		pfp->fpu_fprs = _fp_read_fprs();
236 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
237 		if (pt == curthread && fpu_exists) {
238 			save_gsr(clwp->lwp_fpu);
239 		} else {
240 			uint64_t gsr;
241 			gsr = get_gsr(lwp->lwp_fpu);
242 			set_gsr(gsr, clwp->lwp_fpu);
243 		}
244 		fp_fork(lwp, clwp);
245 	}
246 }
247 
248 /*
249  * Free lwp fpu regs.
250  */
251 void
252 lwp_freeregs(klwp_t *lwp, int isexec)
253 {
254 	kfpu_t *fp = lwptofpu(lwp);
255 
256 	if (lwptot(lwp) == curthread)
257 		fp->fpu_fprs = _fp_read_fprs();
258 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
259 		fp_free(fp, isexec);
260 }
261 
262 /*
263  * fill in the extra register state area specified with the
264  * specified lwp's platform-dependent non-floating-point extra
265  * register state information
266  */
267 /* ARGSUSED */
268 void
269 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
270 {
271 	/* for sun4u nothing to do here, added for symmetry */
272 }
273 
274 /*
275  * fill in the extra register state area specified with the specified lwp's
276  * platform-dependent floating-point extra register state information.
277  * NOTE:  'lwp' might not correspond to 'curthread' since this is
278  * called from code in /proc to get the registers of another lwp.
279  */
280 void
281 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
282 {
283 	prxregset_t *xregs = (prxregset_t *)xrp;
284 	kfpu_t *fp = lwptofpu(lwp);
285 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
286 	uint64_t gsr;
287 
288 	/*
289 	 * fp_fksave() does not flush the GSR register into
290 	 * the lwp area, so do it now
291 	 */
292 	kpreempt_disable();
293 	if (ttolwp(curthread) == lwp && fpu_exists) {
294 		fp->fpu_fprs = _fp_read_fprs();
295 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
296 			_fp_write_fprs(fprs);
297 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
298 		}
299 		save_gsr(fp);
300 	}
301 	gsr = get_gsr(fp);
302 	kpreempt_enable();
303 	PRXREG_GSR(xregs) = gsr;
304 }
305 
306 /*
307  * set the specified lwp's platform-dependent non-floating-point
308  * extra register state based on the specified input
309  */
310 /* ARGSUSED */
311 void
312 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
313 {
314 	/* for sun4u nothing to do here, added for symmetry */
315 }
316 
317 /*
318  * set the specified lwp's platform-dependent floating-point
319  * extra register state based on the specified input
320  */
321 void
322 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
323 {
324 	prxregset_t *xregs = (prxregset_t *)xrp;
325 	kfpu_t *fp = lwptofpu(lwp);
326 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
327 	uint64_t gsr = PRXREG_GSR(xregs);
328 
329 	kpreempt_disable();
330 	set_gsr(gsr, lwptofpu(lwp));
331 
332 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
333 		fp->fpu_fprs = _fp_read_fprs();
334 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
335 			_fp_write_fprs(fprs);
336 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
337 		}
338 		restore_gsr(lwptofpu(lwp));
339 	}
340 	kpreempt_enable();
341 }
342 
343 /*
344  * fill in the sun4u asrs, ie, the lwp's platform-dependent
345  * non-floating-point extra register state information
346  */
347 /* ARGSUSED */
348 void
349 getasrs(klwp_t *lwp, asrset_t asr)
350 {
351 	/* for sun4u nothing to do here, added for symmetry */
352 }
353 
354 /*
355  * fill in the sun4u asrs, ie, the lwp's platform-dependent
356  * floating-point extra register state information
357  */
358 void
359 getfpasrs(klwp_t *lwp, asrset_t asr)
360 {
361 	kfpu_t *fp = lwptofpu(lwp);
362 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
363 
364 	kpreempt_disable();
365 	if (ttolwp(curthread) == lwp)
366 		fp->fpu_fprs = _fp_read_fprs();
367 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
368 		if (fpu_exists && ttolwp(curthread) == lwp) {
369 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
370 				_fp_write_fprs(fprs);
371 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
372 			}
373 			save_gsr(fp);
374 		}
375 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
376 	}
377 	kpreempt_enable();
378 }
379 
380 /*
381  * set the sun4u asrs, ie, the lwp's platform-dependent
382  * non-floating-point extra register state information
383  */
384 /* ARGSUSED */
385 void
386 setasrs(klwp_t *lwp, asrset_t asr)
387 {
388 	/* for sun4u nothing to do here, added for symmetry */
389 }
390 
391 void
392 setfpasrs(klwp_t *lwp, asrset_t asr)
393 {
394 	kfpu_t *fp = lwptofpu(lwp);
395 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
396 
397 	kpreempt_disable();
398 	if (ttolwp(curthread) == lwp)
399 		fp->fpu_fprs = _fp_read_fprs();
400 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
401 		set_gsr(asr[ASR_GSR], fp);
402 		if (fpu_exists && ttolwp(curthread) == lwp) {
403 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
404 				_fp_write_fprs(fprs);
405 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
406 			}
407 			restore_gsr(fp);
408 		}
409 	}
410 	kpreempt_enable();
411 }
412 
413 /*
414  * Create interrupt kstats for this CPU.
415  */
416 void
417 cpu_create_intrstat(cpu_t *cp)
418 {
419 	int		i;
420 	kstat_t		*intr_ksp;
421 	kstat_named_t	*knp;
422 	char		name[KSTAT_STRLEN];
423 	zoneid_t	zoneid;
424 
425 	ASSERT(MUTEX_HELD(&cpu_lock));
426 
427 	if (pool_pset_enabled())
428 		zoneid = GLOBAL_ZONEID;
429 	else
430 		zoneid = ALL_ZONES;
431 
432 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
433 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
434 
435 	/*
436 	 * Initialize each PIL's named kstat
437 	 */
438 	if (intr_ksp != NULL) {
439 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
440 		knp = (kstat_named_t *)intr_ksp->ks_data;
441 		intr_ksp->ks_private = cp;
442 		for (i = 0; i < PIL_MAX; i++) {
443 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
444 			    i + 1);
445 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
446 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
447 			    i + 1);
448 			kstat_named_init(&knp[(i * 2) + 1], name,
449 			    KSTAT_DATA_UINT64);
450 		}
451 		kstat_install(intr_ksp);
452 	}
453 }
454 
455 /*
456  * Delete interrupt kstats for this CPU.
457  */
458 void
459 cpu_delete_intrstat(cpu_t *cp)
460 {
461 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
462 }
463 
464 /*
465  * Convert interrupt statistics from CPU ticks to nanoseconds and
466  * update kstat.
467  */
468 int
469 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
470 {
471 	kstat_named_t	*knp = ksp->ks_data;
472 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
473 	int		i;
474 
475 	if (rw == KSTAT_WRITE)
476 		return (EACCES);
477 
478 	/*
479 	 * We use separate passes to copy and convert the statistics to
480 	 * nanoseconds. This assures that the snapshot of the data is as
481 	 * self-consistent as possible.
482 	 */
483 
484 	for (i = 0; i < PIL_MAX; i++) {
485 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
486 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
487 	}
488 
489 	for (i = 0; i < PIL_MAX; i++) {
490 		knp[i * 2].value.ui64 =
491 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
492 			cpup->cpu_id);
493 	}
494 
495 	return (0);
496 }
497 
498 /*
499  * Called by common/os/cpu.c for psrinfo(1m) kstats
500  */
501 char *
502 cpu_fru_fmri(cpu_t *cp)
503 {
504 	return (cpunodes[cp->cpu_id].fru_fmri);
505 }
506 
507 /*
508  * An interrupt thread is ending a time slice, so compute the interval it
509  * ran for and update the statistic for its PIL.
510  */
511 void
512 cpu_intr_swtch_enter(kthread_id_t t)
513 {
514 	uint64_t	interval;
515 	uint64_t	start;
516 	cpu_t		*cpu;
517 
518 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
519 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
520 
521 	/*
522 	 * We could be here with a zero timestamp. This could happen if:
523 	 * an interrupt thread which no longer has a pinned thread underneath
524 	 * it (i.e. it blocked at some point in its past) has finished running
525 	 * its handler. intr_thread() updated the interrupt statistic for its
526 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
527 	 * return to, swtch() gets called and we end up here.
528 	 *
529 	 * It can also happen if an interrupt thread in intr_thread() calls
530 	 * preempt. It will have already taken care of updating stats. In
531 	 * this event, the interrupt thread will be runnable.
532 	 */
533 	if (t->t_intr_start) {
534 		do {
535 			start = t->t_intr_start;
536 			interval = gettick_counter() - start;
537 		} while (cas64(&t->t_intr_start, start, 0) != start);
538 		cpu = CPU;
539 		if (cpu->cpu_m.divisor > 1)
540 			interval *= cpu->cpu_m.divisor;
541 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
542 
543 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
544 		    interval);
545 	} else
546 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
547 }
548 
549 
550 /*
551  * An interrupt thread is returning from swtch(). Place a starting timestamp
552  * in its thread structure.
553  */
554 void
555 cpu_intr_swtch_exit(kthread_id_t t)
556 {
557 	uint64_t ts;
558 
559 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
560 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
561 
562 	do {
563 		ts = t->t_intr_start;
564 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
565 }
566 
567 
568 int
569 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
570 {
571 	if (&plat_blacklist)
572 		return (plat_blacklist(cmd, scheme, fmri, class));
573 
574 	return (ENOTSUP);
575 }
576 
577 int
578 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
579 {
580 	extern void kdi_flush_caches(void);
581 	size_t nread = 0;
582 	uint32_t word;
583 	int slop, i;
584 
585 	kdi_flush_caches();
586 	membar_enter();
587 
588 	/* We might not begin on a word boundary. */
589 	if ((slop = addr & 3) != 0) {
590 		word = ldphys(addr & ~3);
591 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
592 			*buf++ = ((uchar_t *)&word)[i];
593 		addr = roundup(addr, 4);
594 	}
595 
596 	while (nbytes > 0) {
597 		word = ldphys(addr);
598 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
599 			*buf++ = ((uchar_t *)&word)[i];
600 	}
601 
602 	kdi_flush_caches();
603 
604 	*ncopiedp = nread;
605 	return (0);
606 }
607 
608 int
609 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
610 {
611 	extern void kdi_flush_caches(void);
612 	size_t nwritten = 0;
613 	uint32_t word;
614 	int slop, i;
615 
616 	kdi_flush_caches();
617 
618 	/* We might not begin on a word boundary. */
619 	if ((slop = addr & 3) != 0) {
620 		word = ldphys(addr & ~3);
621 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
622 			((uchar_t *)&word)[i] = *buf++;
623 		stphys(addr & ~3, word);
624 		addr = roundup(addr, 4);
625 	}
626 
627 	while (nbytes > 3) {
628 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
629 			((uchar_t *)&word)[i] = *buf++;
630 		stphys(addr, word);
631 		addr += 4;
632 	}
633 
634 	/* We might not end with a whole word. */
635 	if (nbytes > 0) {
636 		word = ldphys(addr);
637 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
638 			((uchar_t *)&word)[i] = *buf++;
639 		stphys(addr, word);
640 	}
641 
642 	membar_enter();
643 	kdi_flush_caches();
644 
645 	*ncopiedp = nwritten;
646 	return (0);
647 }
648 
649 static void
650 kdi_kernpanic(struct regs *regs, uint_t tt)
651 {
652 	sync_reg_buf = *regs;
653 	sync_tt = tt;
654 
655 	sync_handler();
656 }
657 
658 static void
659 kdi_plat_call(void (*platfn)(void))
660 {
661 	if (platfn != NULL) {
662 		prom_suspend_prepost();
663 		platfn();
664 		prom_resume_prepost();
665 	}
666 }
667 
668 void
669 mach_kdi_init(kdi_t *kdi)
670 {
671 	kdi->kdi_plat_call = kdi_plat_call;
672 	kdi->mkdi_cpu_index = kdi_cpu_index;
673 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
674 	kdi->mkdi_kernpanic = kdi_kernpanic;
675 }
676 
677 
678 /*
679  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
680  * long, and it fills in the array with the time spent on cpu in
681  * each of the mstates, where time is returned in nsec.
682  *
683  * No guarantee is made that the returned values in times[] will
684  * monotonically increase on sequential calls, although this will
685  * be true in the long run. Any such guarantee must be handled by
686  * the caller, if needed. This can happen if we fail to account
687  * for elapsed time due to a generation counter conflict, yet we
688  * did account for it on a prior call (see below).
689  *
690  * The complication is that the cpu in question may be updating
691  * its microstate at the same time that we are reading it.
692  * Because the microstate is only updated when the CPU's state
693  * changes, the values in cpu_intracct[] can be indefinitely out
694  * of date. To determine true current values, it is necessary to
695  * compare the current time with cpu_mstate_start, and add the
696  * difference to times[cpu_mstate].
697  *
698  * This can be a problem if those values are changing out from
699  * under us. Because the code path in new_cpu_mstate() is
700  * performance critical, we have not added a lock to it. Instead,
701  * we have added a generation counter. Before beginning
702  * modifications, the counter is set to 0. After modifications,
703  * it is set to the old value plus one.
704  *
705  * get_cpu_mstate() will not consider the values of cpu_mstate
706  * and cpu_mstate_start to be usable unless the value of
707  * cpu_mstate_gen is both non-zero and unchanged, both before and
708  * after reading the mstate information. Note that we must
709  * protect against out-of-order loads around accesses to the
710  * generation counter. Also, this is a best effort approach in
711  * that we do not retry should the counter be found to have
712  * changed.
713  *
714  * cpu_intracct[] is used to identify time spent in each CPU
715  * mstate while handling interrupts. Such time should be reported
716  * against system time, and so is subtracted out from its
717  * corresponding cpu_acct[] time and added to
718  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
719  * %ticks, but acct time may be stored as %sticks, thus requiring
720  * different conversions before they can be compared.
721  */
722 
723 void
724 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
725 {
726 	int i;
727 	hrtime_t now, start;
728 	uint16_t gen;
729 	uint16_t state;
730 	hrtime_t intracct[NCMSTATES];
731 
732 	/*
733 	 * Load all volatile state under the protection of membar.
734 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
735 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
736 	 * arrives after we make our last check of cpu_mstate_gen.
737 	 */
738 
739 	now = gethrtime_unscaled();
740 	gen = cpu->cpu_mstate_gen;
741 
742 	membar_consumer();	/* guarantee load ordering */
743 	start = cpu->cpu_mstate_start;
744 	state = cpu->cpu_mstate;
745 	for (i = 0; i < NCMSTATES; i++) {
746 		intracct[i] = cpu->cpu_intracct[i];
747 		times[i] = cpu->cpu_acct[i];
748 	}
749 	membar_consumer();	/* guarantee load ordering */
750 
751 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
752 		times[state] += now - start;
753 
754 	for (i = 0; i < NCMSTATES; i++) {
755 		scalehrtime(&times[i]);
756 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
757 	}
758 
759 	for (i = 0; i < NCMSTATES; i++) {
760 		if (i == CMS_SYSTEM)
761 			continue;
762 		times[i] -= intracct[i];
763 		if (times[i] < 0) {
764 			intracct[i] += times[i];
765 			times[i] = 0;
766 		}
767 		times[CMS_SYSTEM] += intracct[i];
768 	}
769 }
770