xref: /titanic_51/usr/src/uts/sun4/os/machdep.c (revision ccbf80fa3b6bf6b986dca9037e5ad9d6c9f9fa65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/chip.h>
51 #include <sys/cmp.h>
52 #include <sys/platform_module.h>
53 #include <sys/bl.h>
54 #include <sys/nvpair.h>
55 #include <sys/kdi_impl.h>
56 #include <sys/machsystm.h>
57 #include <sys/sysmacros.h>
58 #include <sys/promif.h>
59 #include <sys/pool_pset.h>
60 #include <vm/seg_kmem.h>
61 
62 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
63 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
64 
65 /*
66  * Initialize kernel thread's stack.
67  */
68 caddr_t
69 thread_stk_init(caddr_t stk)
70 {
71 	kfpu_t *fp;
72 	ulong_t align;
73 
74 	/* allocate extra space for floating point state */
75 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
76 	align = (uintptr_t)stk & 0x3f;
77 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
78 	fp = (kfpu_t *)stk;
79 	fp->fpu_fprs = 0;
80 
81 	stk -= SA(MINFRAME);
82 	return (stk);
83 }
84 
85 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
86 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
87 
88 kmem_cache_t	*wbuf32_cache;
89 kmem_cache_t	*wbuf64_cache;
90 
91 void
92 lwp_stk_cache_init(void)
93 {
94 	/*
95 	 * Window buffers are allocated from the static arena
96 	 * because they are accessed at TL>0. We also must use
97 	 * KMC_NOHASH to prevent them from straddling page
98 	 * boundaries as they are accessed by physical address.
99 	 */
100 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
101 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
102 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
103 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
104 }
105 
106 /*
107  * Initialize lwp's kernel stack.
108  * Note that now that the floating point register save area (kfpu_t)
109  * has been broken out from machpcb and aligned on a 64 byte boundary so that
110  * we can do block load/stores to/from it, there are a couple of potential
111  * optimizations to save stack space. 1. The floating point register save
112  * area could be aligned on a 16 byte boundary, and the floating point code
113  * changed to (a) check the alignment and (b) use different save/restore
114  * macros depending upon the alignment. 2. The lwp_stk_init code below
115  * could be changed to calculate if less space would be wasted if machpcb
116  * was first instead of second. However there is a REGOFF macro used in
117  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
118  * register area is a fixed distance from the %sp, and would have to be
119  * changed to a pointer or something...JJ said later.
120  */
121 caddr_t
122 lwp_stk_init(klwp_t *lwp, caddr_t stk)
123 {
124 	struct machpcb *mpcb;
125 	kfpu_t *fp;
126 	uintptr_t aln;
127 
128 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
129 	aln = (uintptr_t)stk & 0x3F;
130 	stk -= aln;
131 	fp = (kfpu_t *)stk;
132 	stk -= SA(sizeof (struct machpcb));
133 	mpcb = (struct machpcb *)stk;
134 	bzero(mpcb, sizeof (struct machpcb));
135 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
136 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
137 	lwp->lwp_fpu = (void *)fp;
138 	mpcb->mpcb_fpu = fp;
139 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
140 	mpcb->mpcb_thread = lwp->lwp_thread;
141 	mpcb->mpcb_wbcnt = 0;
142 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
143 		mpcb->mpcb_wstate = WSTATE_USER32;
144 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
145 	} else {
146 		mpcb->mpcb_wstate = WSTATE_USER64;
147 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
148 	}
149 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
150 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
151 	mpcb->mpcb_pa = va_to_pa(mpcb);
152 	return (stk);
153 }
154 
155 void
156 lwp_stk_fini(klwp_t *lwp)
157 {
158 	struct machpcb *mpcb = lwptompcb(lwp);
159 
160 	/*
161 	 * there might be windows still in the wbuf due to unmapped
162 	 * stack, misaligned stack pointer, etc.  We just free it.
163 	 */
164 	mpcb->mpcb_wbcnt = 0;
165 	if (mpcb->mpcb_wstate == WSTATE_USER32)
166 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
167 	else
168 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
169 	mpcb->mpcb_wbuf = NULL;
170 	mpcb->mpcb_wbuf_pa = -1;
171 }
172 
173 
174 /*
175  * Copy regs from parent to child.
176  */
177 void
178 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
179 {
180 	kthread_t *t, *pt = lwptot(lwp);
181 	struct machpcb *mpcb = lwptompcb(clwp);
182 	struct machpcb *pmpcb = lwptompcb(lwp);
183 	kfpu_t *fp, *pfp = lwptofpu(lwp);
184 	caddr_t wbuf;
185 	uint_t wstate;
186 
187 	t = mpcb->mpcb_thread;
188 	/*
189 	 * remember child's fp and wbuf since they will get erased during
190 	 * the bcopy.
191 	 */
192 	fp = mpcb->mpcb_fpu;
193 	wbuf = mpcb->mpcb_wbuf;
194 	wstate = mpcb->mpcb_wstate;
195 	/*
196 	 * Don't copy mpcb_frame since we hand-crafted it
197 	 * in thread_load().
198 	 */
199 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
200 	mpcb->mpcb_thread = t;
201 	mpcb->mpcb_fpu = fp;
202 	fp->fpu_q = mpcb->mpcb_fpu_q;
203 
204 	/*
205 	 * It is theoretically possibly for the lwp's wstate to
206 	 * be different from its value assigned in lwp_stk_init,
207 	 * since lwp_stk_init assumed the data model of the process.
208 	 * Here, we took on the data model of the cloned lwp.
209 	 */
210 	if (mpcb->mpcb_wstate != wstate) {
211 		if (wstate == WSTATE_USER32) {
212 			kmem_cache_free(wbuf32_cache, wbuf);
213 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
214 			wstate = WSTATE_USER64;
215 		} else {
216 			kmem_cache_free(wbuf64_cache, wbuf);
217 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
218 			wstate = WSTATE_USER32;
219 		}
220 	}
221 
222 	mpcb->mpcb_pa = va_to_pa(mpcb);
223 	mpcb->mpcb_wbuf = wbuf;
224 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
225 
226 	ASSERT(mpcb->mpcb_wstate == wstate);
227 
228 	if (mpcb->mpcb_wbcnt != 0) {
229 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
230 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
231 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
232 	}
233 
234 	if (pt == curthread)
235 		pfp->fpu_fprs = _fp_read_fprs();
236 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
237 		if (pt == curthread && fpu_exists) {
238 			save_gsr(clwp->lwp_fpu);
239 		} else {
240 			uint64_t gsr;
241 			gsr = get_gsr(lwp->lwp_fpu);
242 			set_gsr(gsr, clwp->lwp_fpu);
243 		}
244 		fp_fork(lwp, clwp);
245 	}
246 }
247 
248 /*
249  * Free lwp fpu regs.
250  */
251 void
252 lwp_freeregs(klwp_t *lwp, int isexec)
253 {
254 	kfpu_t *fp = lwptofpu(lwp);
255 
256 	if (lwptot(lwp) == curthread)
257 		fp->fpu_fprs = _fp_read_fprs();
258 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
259 		fp_free(fp, isexec);
260 }
261 
262 /*
263  * This function is currently unused on sparc.
264  */
265 /*ARGSUSED*/
266 void
267 lwp_attach_brand_hdlrs(klwp_t *lwp)
268 {}
269 
270 /*
271  * fill in the extra register state area specified with the
272  * specified lwp's platform-dependent non-floating-point extra
273  * register state information
274  */
275 /* ARGSUSED */
276 void
277 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
278 {
279 	/* for sun4u nothing to do here, added for symmetry */
280 }
281 
282 /*
283  * fill in the extra register state area specified with the specified lwp's
284  * platform-dependent floating-point extra register state information.
285  * NOTE:  'lwp' might not correspond to 'curthread' since this is
286  * called from code in /proc to get the registers of another lwp.
287  */
288 void
289 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
290 {
291 	prxregset_t *xregs = (prxregset_t *)xrp;
292 	kfpu_t *fp = lwptofpu(lwp);
293 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
294 	uint64_t gsr;
295 
296 	/*
297 	 * fp_fksave() does not flush the GSR register into
298 	 * the lwp area, so do it now
299 	 */
300 	kpreempt_disable();
301 	if (ttolwp(curthread) == lwp && fpu_exists) {
302 		fp->fpu_fprs = _fp_read_fprs();
303 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
304 			_fp_write_fprs(fprs);
305 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
306 		}
307 		save_gsr(fp);
308 	}
309 	gsr = get_gsr(fp);
310 	kpreempt_enable();
311 	PRXREG_GSR(xregs) = gsr;
312 }
313 
314 /*
315  * set the specified lwp's platform-dependent non-floating-point
316  * extra register state based on the specified input
317  */
318 /* ARGSUSED */
319 void
320 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
321 {
322 	/* for sun4u nothing to do here, added for symmetry */
323 }
324 
325 /*
326  * set the specified lwp's platform-dependent floating-point
327  * extra register state based on the specified input
328  */
329 void
330 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
331 {
332 	prxregset_t *xregs = (prxregset_t *)xrp;
333 	kfpu_t *fp = lwptofpu(lwp);
334 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
335 	uint64_t gsr = PRXREG_GSR(xregs);
336 
337 	kpreempt_disable();
338 	set_gsr(gsr, lwptofpu(lwp));
339 
340 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
341 		fp->fpu_fprs = _fp_read_fprs();
342 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
343 			_fp_write_fprs(fprs);
344 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
345 		}
346 		restore_gsr(lwptofpu(lwp));
347 	}
348 	kpreempt_enable();
349 }
350 
351 /*
352  * fill in the sun4u asrs, ie, the lwp's platform-dependent
353  * non-floating-point extra register state information
354  */
355 /* ARGSUSED */
356 void
357 getasrs(klwp_t *lwp, asrset_t asr)
358 {
359 	/* for sun4u nothing to do here, added for symmetry */
360 }
361 
362 /*
363  * fill in the sun4u asrs, ie, the lwp's platform-dependent
364  * floating-point extra register state information
365  */
366 void
367 getfpasrs(klwp_t *lwp, asrset_t asr)
368 {
369 	kfpu_t *fp = lwptofpu(lwp);
370 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
371 
372 	kpreempt_disable();
373 	if (ttolwp(curthread) == lwp)
374 		fp->fpu_fprs = _fp_read_fprs();
375 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
376 		if (fpu_exists && ttolwp(curthread) == lwp) {
377 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
378 				_fp_write_fprs(fprs);
379 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
380 			}
381 			save_gsr(fp);
382 		}
383 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
384 	}
385 	kpreempt_enable();
386 }
387 
388 /*
389  * set the sun4u asrs, ie, the lwp's platform-dependent
390  * non-floating-point extra register state information
391  */
392 /* ARGSUSED */
393 void
394 setasrs(klwp_t *lwp, asrset_t asr)
395 {
396 	/* for sun4u nothing to do here, added for symmetry */
397 }
398 
399 void
400 setfpasrs(klwp_t *lwp, asrset_t asr)
401 {
402 	kfpu_t *fp = lwptofpu(lwp);
403 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
404 
405 	kpreempt_disable();
406 	if (ttolwp(curthread) == lwp)
407 		fp->fpu_fprs = _fp_read_fprs();
408 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
409 		set_gsr(asr[ASR_GSR], fp);
410 		if (fpu_exists && ttolwp(curthread) == lwp) {
411 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
412 				_fp_write_fprs(fprs);
413 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
414 			}
415 			restore_gsr(fp);
416 		}
417 	}
418 	kpreempt_enable();
419 }
420 
421 /*
422  * Create interrupt kstats for this CPU.
423  */
424 void
425 cpu_create_intrstat(cpu_t *cp)
426 {
427 	int		i;
428 	kstat_t		*intr_ksp;
429 	kstat_named_t	*knp;
430 	char		name[KSTAT_STRLEN];
431 	zoneid_t	zoneid;
432 
433 	ASSERT(MUTEX_HELD(&cpu_lock));
434 
435 	if (pool_pset_enabled())
436 		zoneid = GLOBAL_ZONEID;
437 	else
438 		zoneid = ALL_ZONES;
439 
440 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
441 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
442 
443 	/*
444 	 * Initialize each PIL's named kstat
445 	 */
446 	if (intr_ksp != NULL) {
447 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
448 		knp = (kstat_named_t *)intr_ksp->ks_data;
449 		intr_ksp->ks_private = cp;
450 		for (i = 0; i < PIL_MAX; i++) {
451 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
452 			    i + 1);
453 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
454 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
455 			    i + 1);
456 			kstat_named_init(&knp[(i * 2) + 1], name,
457 			    KSTAT_DATA_UINT64);
458 		}
459 		kstat_install(intr_ksp);
460 	}
461 }
462 
463 /*
464  * Delete interrupt kstats for this CPU.
465  */
466 void
467 cpu_delete_intrstat(cpu_t *cp)
468 {
469 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
470 }
471 
472 /*
473  * Convert interrupt statistics from CPU ticks to nanoseconds and
474  * update kstat.
475  */
476 int
477 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
478 {
479 	kstat_named_t	*knp = ksp->ks_data;
480 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
481 	int		i;
482 
483 	if (rw == KSTAT_WRITE)
484 		return (EACCES);
485 
486 	/*
487 	 * We use separate passes to copy and convert the statistics to
488 	 * nanoseconds. This assures that the snapshot of the data is as
489 	 * self-consistent as possible.
490 	 */
491 
492 	for (i = 0; i < PIL_MAX; i++) {
493 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
494 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
495 	}
496 
497 	for (i = 0; i < PIL_MAX; i++) {
498 		knp[i * 2].value.ui64 =
499 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
500 			cpup->cpu_id);
501 	}
502 
503 	return (0);
504 }
505 
506 /*
507  * Called by common/os/cpu.c for psrinfo(1m) kstats
508  */
509 char *
510 cpu_fru_fmri(cpu_t *cp)
511 {
512 	return (cpunodes[cp->cpu_id].fru_fmri);
513 }
514 
515 /*
516  * An interrupt thread is ending a time slice, so compute the interval it
517  * ran for and update the statistic for its PIL.
518  */
519 void
520 cpu_intr_swtch_enter(kthread_id_t t)
521 {
522 	uint64_t	interval;
523 	uint64_t	start;
524 	cpu_t		*cpu;
525 
526 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
527 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
528 
529 	/*
530 	 * We could be here with a zero timestamp. This could happen if:
531 	 * an interrupt thread which no longer has a pinned thread underneath
532 	 * it (i.e. it blocked at some point in its past) has finished running
533 	 * its handler. intr_thread() updated the interrupt statistic for its
534 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
535 	 * return to, swtch() gets called and we end up here.
536 	 *
537 	 * It can also happen if an interrupt thread in intr_thread() calls
538 	 * preempt. It will have already taken care of updating stats. In
539 	 * this event, the interrupt thread will be runnable.
540 	 */
541 	if (t->t_intr_start) {
542 		do {
543 			start = t->t_intr_start;
544 			interval = gettick_counter() - start;
545 		} while (cas64(&t->t_intr_start, start, 0) != start);
546 		cpu = CPU;
547 		if (cpu->cpu_m.divisor > 1)
548 			interval *= cpu->cpu_m.divisor;
549 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
550 
551 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
552 		    interval);
553 	} else
554 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
555 }
556 
557 
558 /*
559  * An interrupt thread is returning from swtch(). Place a starting timestamp
560  * in its thread structure.
561  */
562 void
563 cpu_intr_swtch_exit(kthread_id_t t)
564 {
565 	uint64_t ts;
566 
567 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
568 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
569 
570 	do {
571 		ts = t->t_intr_start;
572 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
573 }
574 
575 
576 int
577 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
578 {
579 	if (&plat_blacklist)
580 		return (plat_blacklist(cmd, scheme, fmri, class));
581 
582 	return (ENOTSUP);
583 }
584 
585 int
586 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
587 {
588 	extern void kdi_flush_caches(void);
589 	size_t nread = 0;
590 	uint32_t word;
591 	int slop, i;
592 
593 	kdi_flush_caches();
594 	membar_enter();
595 
596 	/* We might not begin on a word boundary. */
597 	if ((slop = addr & 3) != 0) {
598 		word = ldphys(addr & ~3);
599 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
600 			*buf++ = ((uchar_t *)&word)[i];
601 		addr = roundup(addr, 4);
602 	}
603 
604 	while (nbytes > 0) {
605 		word = ldphys(addr);
606 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
607 			*buf++ = ((uchar_t *)&word)[i];
608 	}
609 
610 	kdi_flush_caches();
611 
612 	*ncopiedp = nread;
613 	return (0);
614 }
615 
616 int
617 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
618 {
619 	extern void kdi_flush_caches(void);
620 	size_t nwritten = 0;
621 	uint32_t word;
622 	int slop, i;
623 
624 	kdi_flush_caches();
625 
626 	/* We might not begin on a word boundary. */
627 	if ((slop = addr & 3) != 0) {
628 		word = ldphys(addr & ~3);
629 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
630 			((uchar_t *)&word)[i] = *buf++;
631 		stphys(addr & ~3, word);
632 		addr = roundup(addr, 4);
633 	}
634 
635 	while (nbytes > 3) {
636 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
637 			((uchar_t *)&word)[i] = *buf++;
638 		stphys(addr, word);
639 		addr += 4;
640 	}
641 
642 	/* We might not end with a whole word. */
643 	if (nbytes > 0) {
644 		word = ldphys(addr);
645 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
646 			((uchar_t *)&word)[i] = *buf++;
647 		stphys(addr, word);
648 	}
649 
650 	membar_enter();
651 	kdi_flush_caches();
652 
653 	*ncopiedp = nwritten;
654 	return (0);
655 }
656 
657 static void
658 kdi_kernpanic(struct regs *regs, uint_t tt)
659 {
660 	sync_reg_buf = *regs;
661 	sync_tt = tt;
662 
663 	sync_handler();
664 }
665 
666 static void
667 kdi_plat_call(void (*platfn)(void))
668 {
669 	if (platfn != NULL) {
670 		prom_suspend_prepost();
671 		platfn();
672 		prom_resume_prepost();
673 	}
674 }
675 
676 void
677 mach_kdi_init(kdi_t *kdi)
678 {
679 	kdi->kdi_plat_call = kdi_plat_call;
680 	kdi->mkdi_cpu_index = kdi_cpu_index;
681 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
682 	kdi->mkdi_kernpanic = kdi_kernpanic;
683 }
684 
685 
686 /*
687  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
688  * long, and it fills in the array with the time spent on cpu in
689  * each of the mstates, where time is returned in nsec.
690  *
691  * No guarantee is made that the returned values in times[] will
692  * monotonically increase on sequential calls, although this will
693  * be true in the long run. Any such guarantee must be handled by
694  * the caller, if needed. This can happen if we fail to account
695  * for elapsed time due to a generation counter conflict, yet we
696  * did account for it on a prior call (see below).
697  *
698  * The complication is that the cpu in question may be updating
699  * its microstate at the same time that we are reading it.
700  * Because the microstate is only updated when the CPU's state
701  * changes, the values in cpu_intracct[] can be indefinitely out
702  * of date. To determine true current values, it is necessary to
703  * compare the current time with cpu_mstate_start, and add the
704  * difference to times[cpu_mstate].
705  *
706  * This can be a problem if those values are changing out from
707  * under us. Because the code path in new_cpu_mstate() is
708  * performance critical, we have not added a lock to it. Instead,
709  * we have added a generation counter. Before beginning
710  * modifications, the counter is set to 0. After modifications,
711  * it is set to the old value plus one.
712  *
713  * get_cpu_mstate() will not consider the values of cpu_mstate
714  * and cpu_mstate_start to be usable unless the value of
715  * cpu_mstate_gen is both non-zero and unchanged, both before and
716  * after reading the mstate information. Note that we must
717  * protect against out-of-order loads around accesses to the
718  * generation counter. Also, this is a best effort approach in
719  * that we do not retry should the counter be found to have
720  * changed.
721  *
722  * cpu_intracct[] is used to identify time spent in each CPU
723  * mstate while handling interrupts. Such time should be reported
724  * against system time, and so is subtracted out from its
725  * corresponding cpu_acct[] time and added to
726  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
727  * %ticks, but acct time may be stored as %sticks, thus requiring
728  * different conversions before they can be compared.
729  */
730 
731 void
732 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
733 {
734 	int i;
735 	hrtime_t now, start;
736 	uint16_t gen;
737 	uint16_t state;
738 	hrtime_t intracct[NCMSTATES];
739 
740 	/*
741 	 * Load all volatile state under the protection of membar.
742 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
743 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
744 	 * arrives after we make our last check of cpu_mstate_gen.
745 	 */
746 
747 	now = gethrtime_unscaled();
748 	gen = cpu->cpu_mstate_gen;
749 
750 	membar_consumer();	/* guarantee load ordering */
751 	start = cpu->cpu_mstate_start;
752 	state = cpu->cpu_mstate;
753 	for (i = 0; i < NCMSTATES; i++) {
754 		intracct[i] = cpu->cpu_intracct[i];
755 		times[i] = cpu->cpu_acct[i];
756 	}
757 	membar_consumer();	/* guarantee load ordering */
758 
759 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
760 		times[state] += now - start;
761 
762 	for (i = 0; i < NCMSTATES; i++) {
763 		scalehrtime(&times[i]);
764 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
765 	}
766 
767 	for (i = 0; i < NCMSTATES; i++) {
768 		if (i == CMS_SYSTEM)
769 			continue;
770 		times[i] -= intracct[i];
771 		if (times[i] < 0) {
772 			intracct[i] += times[i];
773 			times[i] = 0;
774 		}
775 		times[CMS_SYSTEM] += intracct[i];
776 	}
777 }
778