xref: /titanic_50/usr/src/uts/sun4/os/machdep.c (revision 0e42dee69ed771bf604dd1789fca9d77b5bbe302)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/chip.h>
51 #include <sys/cmp.h>
52 #include <sys/platform_module.h>
53 #include <sys/bl.h>
54 #include <sys/nvpair.h>
55 #include <sys/kdi_impl.h>
56 #include <sys/machsystm.h>
57 #include <sys/sysmacros.h>
58 #include <sys/promif.h>
59 #include <sys/pool_pset.h>
60 #include <vm/seg_kmem.h>
61 
62 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
63 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
64 
65 /*
66  * Initialize kernel thread's stack.
67  */
68 caddr_t
69 thread_stk_init(caddr_t stk)
70 {
71 	kfpu_t *fp;
72 	ulong_t align;
73 
74 	/* allocate extra space for floating point state */
75 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
76 	align = (uintptr_t)stk & 0x3f;
77 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
78 	fp = (kfpu_t *)stk;
79 	fp->fpu_fprs = 0;
80 
81 	stk -= SA(MINFRAME);
82 	return (stk);
83 }
84 
85 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
86 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
87 
88 kmem_cache_t	*wbuf32_cache;
89 kmem_cache_t	*wbuf64_cache;
90 
91 void
92 lwp_stk_cache_init(void)
93 {
94 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
95 	    0, NULL, NULL, NULL, NULL, static_arena, 0);
96 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
97 	    0, NULL, NULL, NULL, NULL, static_arena, 0);
98 }
99 
100 /*
101  * Initialize lwp's kernel stack.
102  * Note that now that the floating point register save area (kfpu_t)
103  * has been broken out from machpcb and aligned on a 64 byte boundary so that
104  * we can do block load/stores to/from it, there are a couple of potential
105  * optimizations to save stack space. 1. The floating point register save
106  * area could be aligned on a 16 byte boundary, and the floating point code
107  * changed to (a) check the alignment and (b) use different save/restore
108  * macros depending upon the alignment. 2. The lwp_stk_init code below
109  * could be changed to calculate if less space would be wasted if machpcb
110  * was first instead of second. However there is a REGOFF macro used in
111  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
112  * register area is a fixed distance from the %sp, and would have to be
113  * changed to a pointer or something...JJ said later.
114  */
115 caddr_t
116 lwp_stk_init(klwp_t *lwp, caddr_t stk)
117 {
118 	struct machpcb *mpcb;
119 	kfpu_t *fp;
120 	uintptr_t aln;
121 
122 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
123 	aln = (uintptr_t)stk & 0x3F;
124 	stk -= aln;
125 	fp = (kfpu_t *)stk;
126 	stk -= SA(sizeof (struct machpcb));
127 	mpcb = (struct machpcb *)stk;
128 	bzero(mpcb, sizeof (struct machpcb));
129 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
130 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
131 	lwp->lwp_fpu = (void *)fp;
132 	mpcb->mpcb_fpu = fp;
133 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
134 	mpcb->mpcb_thread = lwp->lwp_thread;
135 	mpcb->mpcb_wbcnt = 0;
136 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
137 		mpcb->mpcb_wstate = WSTATE_USER32;
138 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
139 	} else {
140 		mpcb->mpcb_wstate = WSTATE_USER64;
141 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
142 	}
143 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
144 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
145 	mpcb->mpcb_pa = va_to_pa(mpcb);
146 	return (stk);
147 }
148 
149 void
150 lwp_stk_fini(klwp_t *lwp)
151 {
152 	struct machpcb *mpcb = lwptompcb(lwp);
153 
154 	/*
155 	 * there might be windows still in the wbuf due to unmapped
156 	 * stack, misaligned stack pointer, etc.  We just free it.
157 	 */
158 	mpcb->mpcb_wbcnt = 0;
159 	if (mpcb->mpcb_wstate == WSTATE_USER32)
160 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
161 	else
162 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
163 	mpcb->mpcb_wbuf = NULL;
164 	mpcb->mpcb_wbuf_pa = -1;
165 }
166 
167 
168 /*
169  * Copy regs from parent to child.
170  */
171 void
172 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
173 {
174 	kthread_t *t, *pt = lwptot(lwp);
175 	struct machpcb *mpcb = lwptompcb(clwp);
176 	struct machpcb *pmpcb = lwptompcb(lwp);
177 	kfpu_t *fp, *pfp = lwptofpu(lwp);
178 	caddr_t wbuf;
179 	uint_t wstate;
180 
181 	t = mpcb->mpcb_thread;
182 	/*
183 	 * remember child's fp and wbuf since they will get erased during
184 	 * the bcopy.
185 	 */
186 	fp = mpcb->mpcb_fpu;
187 	wbuf = mpcb->mpcb_wbuf;
188 	wstate = mpcb->mpcb_wstate;
189 	/*
190 	 * Don't copy mpcb_frame since we hand-crafted it
191 	 * in thread_load().
192 	 */
193 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
194 	mpcb->mpcb_thread = t;
195 	mpcb->mpcb_fpu = fp;
196 	fp->fpu_q = mpcb->mpcb_fpu_q;
197 
198 	/*
199 	 * It is theoretically possibly for the lwp's wstate to
200 	 * be different from its value assigned in lwp_stk_init,
201 	 * since lwp_stk_init assumed the data model of the process.
202 	 * Here, we took on the data model of the cloned lwp.
203 	 */
204 	if (mpcb->mpcb_wstate != wstate) {
205 		if (wstate == WSTATE_USER32) {
206 			kmem_cache_free(wbuf32_cache, wbuf);
207 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
208 			wstate = WSTATE_USER64;
209 		} else {
210 			kmem_cache_free(wbuf64_cache, wbuf);
211 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
212 			wstate = WSTATE_USER32;
213 		}
214 	}
215 
216 	mpcb->mpcb_pa = va_to_pa(mpcb);
217 	mpcb->mpcb_wbuf = wbuf;
218 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
219 
220 	ASSERT(mpcb->mpcb_wstate == wstate);
221 
222 	if (mpcb->mpcb_wbcnt != 0) {
223 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
224 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
225 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
226 	}
227 
228 	if (pt == curthread)
229 		pfp->fpu_fprs = _fp_read_fprs();
230 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
231 		if (pt == curthread && fpu_exists) {
232 			save_gsr(clwp->lwp_fpu);
233 		} else {
234 			uint64_t gsr;
235 			gsr = get_gsr(lwp->lwp_fpu);
236 			set_gsr(gsr, clwp->lwp_fpu);
237 		}
238 		fp_fork(lwp, clwp);
239 	}
240 }
241 
242 /*
243  * Free lwp fpu regs.
244  */
245 void
246 lwp_freeregs(klwp_t *lwp, int isexec)
247 {
248 	kfpu_t *fp = lwptofpu(lwp);
249 
250 	if (lwptot(lwp) == curthread)
251 		fp->fpu_fprs = _fp_read_fprs();
252 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
253 		fp_free(fp, isexec);
254 }
255 
256 /*
257  * fill in the extra register state area specified with the
258  * specified lwp's platform-dependent non-floating-point extra
259  * register state information
260  */
261 /* ARGSUSED */
262 void
263 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
264 {
265 	/* for sun4u nothing to do here, added for symmetry */
266 }
267 
268 /*
269  * fill in the extra register state area specified with the specified lwp's
270  * platform-dependent floating-point extra register state information.
271  * NOTE:  'lwp' might not correspond to 'curthread' since this is
272  * called from code in /proc to get the registers of another lwp.
273  */
274 void
275 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
276 {
277 	prxregset_t *xregs = (prxregset_t *)xrp;
278 	kfpu_t *fp = lwptofpu(lwp);
279 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
280 	uint64_t gsr;
281 
282 	/*
283 	 * fp_fksave() does not flush the GSR register into
284 	 * the lwp area, so do it now
285 	 */
286 	kpreempt_disable();
287 	if (ttolwp(curthread) == lwp && fpu_exists) {
288 		fp->fpu_fprs = _fp_read_fprs();
289 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
290 			_fp_write_fprs(fprs);
291 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
292 		}
293 		save_gsr(fp);
294 	}
295 	gsr = get_gsr(fp);
296 	kpreempt_enable();
297 	PRXREG_GSR(xregs) = gsr;
298 }
299 
300 /*
301  * set the specified lwp's platform-dependent non-floating-point
302  * extra register state based on the specified input
303  */
304 /* ARGSUSED */
305 void
306 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
307 {
308 	/* for sun4u nothing to do here, added for symmetry */
309 }
310 
311 /*
312  * set the specified lwp's platform-dependent floating-point
313  * extra register state based on the specified input
314  */
315 void
316 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
317 {
318 	prxregset_t *xregs = (prxregset_t *)xrp;
319 	kfpu_t *fp = lwptofpu(lwp);
320 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
321 	uint64_t gsr = PRXREG_GSR(xregs);
322 
323 	kpreempt_disable();
324 	set_gsr(gsr, lwptofpu(lwp));
325 
326 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
327 		fp->fpu_fprs = _fp_read_fprs();
328 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
329 			_fp_write_fprs(fprs);
330 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
331 		}
332 		restore_gsr(lwptofpu(lwp));
333 	}
334 	kpreempt_enable();
335 }
336 
337 /*
338  * fill in the sun4u asrs, ie, the lwp's platform-dependent
339  * non-floating-point extra register state information
340  */
341 /* ARGSUSED */
342 void
343 getasrs(klwp_t *lwp, asrset_t asr)
344 {
345 	/* for sun4u nothing to do here, added for symmetry */
346 }
347 
348 /*
349  * fill in the sun4u asrs, ie, the lwp's platform-dependent
350  * floating-point extra register state information
351  */
352 void
353 getfpasrs(klwp_t *lwp, asrset_t asr)
354 {
355 	kfpu_t *fp = lwptofpu(lwp);
356 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
357 
358 	kpreempt_disable();
359 	if (ttolwp(curthread) == lwp)
360 		fp->fpu_fprs = _fp_read_fprs();
361 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
362 		if (fpu_exists && ttolwp(curthread) == lwp) {
363 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
364 				_fp_write_fprs(fprs);
365 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
366 			}
367 			save_gsr(fp);
368 		}
369 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
370 	}
371 	kpreempt_enable();
372 }
373 
374 /*
375  * set the sun4u asrs, ie, the lwp's platform-dependent
376  * non-floating-point extra register state information
377  */
378 /* ARGSUSED */
379 void
380 setasrs(klwp_t *lwp, asrset_t asr)
381 {
382 	/* for sun4u nothing to do here, added for symmetry */
383 }
384 
385 void
386 setfpasrs(klwp_t *lwp, asrset_t asr)
387 {
388 	kfpu_t *fp = lwptofpu(lwp);
389 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
390 
391 	kpreempt_disable();
392 	if (ttolwp(curthread) == lwp)
393 		fp->fpu_fprs = _fp_read_fprs();
394 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
395 		set_gsr(asr[ASR_GSR], fp);
396 		if (fpu_exists && ttolwp(curthread) == lwp) {
397 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
398 				_fp_write_fprs(fprs);
399 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
400 			}
401 			restore_gsr(fp);
402 		}
403 	}
404 	kpreempt_enable();
405 }
406 
407 /*
408  * Create interrupt kstats for this CPU.
409  */
410 void
411 cpu_create_intrstat(cpu_t *cp)
412 {
413 	int		i;
414 	kstat_t		*intr_ksp;
415 	kstat_named_t	*knp;
416 	char		name[KSTAT_STRLEN];
417 	zoneid_t	zoneid;
418 
419 	ASSERT(MUTEX_HELD(&cpu_lock));
420 
421 	if (pool_pset_enabled())
422 		zoneid = GLOBAL_ZONEID;
423 	else
424 		zoneid = ALL_ZONES;
425 
426 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
427 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
428 
429 	/*
430 	 * Initialize each PIL's named kstat
431 	 */
432 	if (intr_ksp != NULL) {
433 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
434 		knp = (kstat_named_t *)intr_ksp->ks_data;
435 		intr_ksp->ks_private = cp;
436 		for (i = 0; i < PIL_MAX; i++) {
437 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
438 			    i + 1);
439 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
440 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
441 			    i + 1);
442 			kstat_named_init(&knp[(i * 2) + 1], name,
443 			    KSTAT_DATA_UINT64);
444 		}
445 		kstat_install(intr_ksp);
446 	}
447 }
448 
449 /*
450  * Delete interrupt kstats for this CPU.
451  */
452 void
453 cpu_delete_intrstat(cpu_t *cp)
454 {
455 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
456 }
457 
458 /*
459  * Convert interrupt statistics from CPU ticks to nanoseconds and
460  * update kstat.
461  */
462 int
463 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
464 {
465 	kstat_named_t	*knp = ksp->ks_data;
466 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
467 	int		i;
468 
469 	if (rw == KSTAT_WRITE)
470 		return (EACCES);
471 
472 	/*
473 	 * We use separate passes to copy and convert the statistics to
474 	 * nanoseconds. This assures that the snapshot of the data is as
475 	 * self-consistent as possible.
476 	 */
477 
478 	for (i = 0; i < PIL_MAX; i++) {
479 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
480 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
481 	}
482 
483 	for (i = 0; i < PIL_MAX; i++) {
484 		knp[i * 2].value.ui64 =
485 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
486 			cpup->cpu_id);
487 	}
488 
489 	return (0);
490 }
491 
492 /*
493  * Called by common/os/cpu.c for psrinfo(1m) kstats
494  */
495 char *
496 cpu_fru_fmri(cpu_t *cp)
497 {
498 	return (cpunodes[cp->cpu_id].fru_fmri);
499 }
500 
501 /*
502  * An interrupt thread is ending a time slice, so compute the interval it
503  * ran for and update the statistic for its PIL.
504  */
505 void
506 cpu_intr_swtch_enter(kthread_id_t t)
507 {
508 	uint64_t	interval;
509 	uint64_t	start;
510 	cpu_t		*cpu;
511 
512 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
513 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
514 
515 	/*
516 	 * We could be here with a zero timestamp. This could happen if:
517 	 * an interrupt thread which no longer has a pinned thread underneath
518 	 * it (i.e. it blocked at some point in its past) has finished running
519 	 * its handler. intr_thread() updated the interrupt statistic for its
520 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
521 	 * return to, swtch() gets called and we end up here.
522 	 *
523 	 * It can also happen if an interrupt thread in intr_thread() calls
524 	 * preempt. It will have already taken care of updating stats. In
525 	 * this event, the interrupt thread will be runnable.
526 	 */
527 	if (t->t_intr_start) {
528 		do {
529 			start = t->t_intr_start;
530 			interval = gettick_counter() - start;
531 		} while (cas64(&t->t_intr_start, start, 0) != start);
532 		cpu = CPU;
533 		if (cpu->cpu_m.divisor > 1)
534 			interval *= cpu->cpu_m.divisor;
535 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
536 
537 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
538 		    interval);
539 	} else
540 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
541 }
542 
543 
544 /*
545  * An interrupt thread is returning from swtch(). Place a starting timestamp
546  * in its thread structure.
547  */
548 void
549 cpu_intr_swtch_exit(kthread_id_t t)
550 {
551 	uint64_t ts;
552 
553 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
554 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
555 
556 	do {
557 		ts = t->t_intr_start;
558 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
559 }
560 
561 
562 int
563 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
564 {
565 	if (&plat_blacklist)
566 		return (plat_blacklist(cmd, scheme, fmri, class));
567 
568 	return (ENOTSUP);
569 }
570 
571 int
572 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
573 {
574 	extern void kdi_flush_caches(void);
575 	size_t nread = 0;
576 	uint32_t word;
577 	int slop, i;
578 
579 	kdi_flush_caches();
580 	membar_enter();
581 
582 	/* We might not begin on a word boundary. */
583 	if ((slop = addr & 3) != 0) {
584 		word = ldphys(addr & ~3);
585 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
586 			*buf++ = ((uchar_t *)&word)[i];
587 		addr = roundup(addr, 4);
588 	}
589 
590 	while (nbytes > 0) {
591 		word = ldphys(addr);
592 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
593 			*buf++ = ((uchar_t *)&word)[i];
594 	}
595 
596 	kdi_flush_caches();
597 
598 	*ncopiedp = nread;
599 	return (0);
600 }
601 
602 int
603 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
604 {
605 	extern void kdi_flush_caches(void);
606 	size_t nwritten = 0;
607 	uint32_t word;
608 	int slop, i;
609 
610 	kdi_flush_caches();
611 
612 	/* We might not begin on a word boundary. */
613 	if ((slop = addr & 3) != 0) {
614 		word = ldphys(addr & ~3);
615 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
616 			((uchar_t *)&word)[i] = *buf++;
617 		stphys(addr & ~3, word);
618 		addr = roundup(addr, 4);
619 	}
620 
621 	while (nbytes > 3) {
622 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
623 			((uchar_t *)&word)[i] = *buf++;
624 		stphys(addr, word);
625 		addr += 4;
626 	}
627 
628 	/* We might not end with a whole word. */
629 	if (nbytes > 0) {
630 		word = ldphys(addr);
631 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
632 			((uchar_t *)&word)[i] = *buf++;
633 		stphys(addr, word);
634 	}
635 
636 	membar_enter();
637 	kdi_flush_caches();
638 
639 	*ncopiedp = nwritten;
640 	return (0);
641 }
642 
643 static void
644 kdi_kernpanic(struct regs *regs, uint_t tt)
645 {
646 	sync_reg_buf = *regs;
647 	sync_tt = tt;
648 
649 	sync_handler();
650 }
651 
652 static void
653 kdi_plat_call(void (*platfn)(void))
654 {
655 	if (platfn != NULL) {
656 		prom_suspend_prepost();
657 		platfn();
658 		prom_resume_prepost();
659 	}
660 }
661 
662 void
663 mach_kdi_init(kdi_t *kdi)
664 {
665 	kdi->kdi_plat_call = kdi_plat_call;
666 	kdi->mkdi_cpu_index = kdi_cpu_index;
667 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
668 	kdi->mkdi_kernpanic = kdi_kernpanic;
669 }
670 
671 
672 /*
673  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
674  * long, and it fills in the array with the time spent on cpu in
675  * each of the mstates, where time is returned in nsec.
676  *
677  * No guarantee is made that the returned values in times[] will
678  * monotonically increase on sequential calls, although this will
679  * be true in the long run. Any such guarantee must be handled by
680  * the caller, if needed. This can happen if we fail to account
681  * for elapsed time due to a generation counter conflict, yet we
682  * did account for it on a prior call (see below).
683  *
684  * The complication is that the cpu in question may be updating
685  * its microstate at the same time that we are reading it.
686  * Because the microstate is only updated when the CPU's state
687  * changes, the values in cpu_intracct[] can be indefinitely out
688  * of date. To determine true current values, it is necessary to
689  * compare the current time with cpu_mstate_start, and add the
690  * difference to times[cpu_mstate].
691  *
692  * This can be a problem if those values are changing out from
693  * under us. Because the code path in new_cpu_mstate() is
694  * performance critical, we have not added a lock to it. Instead,
695  * we have added a generation counter. Before beginning
696  * modifications, the counter is set to 0. After modifications,
697  * it is set to the old value plus one.
698  *
699  * get_cpu_mstate() will not consider the values of cpu_mstate
700  * and cpu_mstate_start to be usable unless the value of
701  * cpu_mstate_gen is both non-zero and unchanged, both before and
702  * after reading the mstate information. Note that we must
703  * protect against out-of-order loads around accesses to the
704  * generation counter. Also, this is a best effort approach in
705  * that we do not retry should the counter be found to have
706  * changed.
707  *
708  * cpu_intracct[] is used to identify time spent in each CPU
709  * mstate while handling interrupts. Such time should be reported
710  * against system time, and so is subtracted out from its
711  * corresponding cpu_acct[] time and added to
712  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
713  * %ticks, but acct time may be stored as %sticks, thus requiring
714  * different conversions before they can be compared.
715  */
716 
717 void
718 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
719 {
720 	int i;
721 	hrtime_t now, start;
722 	uint16_t gen;
723 	uint16_t state;
724 	hrtime_t intracct[NCMSTATES];
725 
726 	/*
727 	 * Load all volatile state under the protection of membar.
728 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
729 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
730 	 * arrives after we make our last check of cpu_mstate_gen.
731 	 */
732 
733 	now = gethrtime_unscaled();
734 	gen = cpu->cpu_mstate_gen;
735 
736 	membar_consumer();	/* guarantee load ordering */
737 	start = cpu->cpu_mstate_start;
738 	state = cpu->cpu_mstate;
739 	for (i = 0; i < NCMSTATES; i++) {
740 		intracct[i] = cpu->cpu_intracct[i];
741 		times[i] = cpu->cpu_acct[i];
742 	}
743 	membar_consumer();	/* guarantee load ordering */
744 
745 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
746 		times[state] += now - start;
747 
748 	for (i = 0; i < NCMSTATES; i++) {
749 		scalehrtime(&times[i]);
750 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
751 	}
752 
753 	for (i = 0; i < NCMSTATES; i++) {
754 		if (i == CMS_SYSTEM)
755 			continue;
756 		times[i] -= intracct[i];
757 		if (times[i] < 0) {
758 			intracct[i] += times[i];
759 			times[i] = 0;
760 		}
761 		times[CMS_SYSTEM] += intracct[i];
762 	}
763 }
764