xref: /titanic_51/usr/src/uts/sun4/os/machdep.c (revision 898f87d827bdc9bc8d3c430cf4b38cfb8bc849ae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/kstat.h>
31 #include <sys/param.h>
32 #include <sys/stack.h>
33 #include <sys/regset.h>
34 #include <sys/thread.h>
35 #include <sys/proc.h>
36 #include <sys/procfs_isa.h>
37 #include <sys/kmem.h>
38 #include <sys/cpuvar.h>
39 #include <sys/systm.h>
40 #include <sys/machpcb.h>
41 #include <sys/machasi.h>
42 #include <sys/vis.h>
43 #include <sys/fpu/fpusystm.h>
44 #include <sys/cpu_module.h>
45 #include <sys/privregs.h>
46 #include <sys/archsystm.h>
47 #include <sys/atomic.h>
48 #include <sys/cmn_err.h>
49 #include <sys/time.h>
50 #include <sys/clock.h>
51 #include <sys/chip.h>
52 #include <sys/cmp.h>
53 #include <sys/platform_module.h>
54 #include <sys/bl.h>
55 #include <sys/nvpair.h>
56 #include <sys/kdi_impl.h>
57 #include <sys/machsystm.h>
58 #include <sys/sysmacros.h>
59 #include <sys/promif.h>
60 #include <sys/pool_pset.h>
61 
62 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
63 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
64 
65 /*
66  * Initialize kernel thread's stack.
67  */
68 caddr_t
69 thread_stk_init(caddr_t stk)
70 {
71 	kfpu_t *fp;
72 	ulong_t align;
73 
74 	/* allocate extra space for floating point state */
75 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
76 	align = (uintptr_t)stk & 0x3f;
77 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
78 	fp = (kfpu_t *)stk;
79 	fp->fpu_fprs = 0;
80 
81 	stk -= SA(MINFRAME);
82 	return (stk);
83 }
84 
85 /*
86  * Initialize lwp's kernel stack.
87  * Note that now that the floating point register save area (kfpu_t)
88  * has been broken out from machpcb and aligned on a 64 byte boundary so that
89  * we can do block load/stores to/from it, there are a couple of potential
90  * optimizations to save stack space. 1. The floating point register save
91  * area could be aligned on a 16 byte boundary, and the floating point code
92  * changed to (a) check the alignment and (b) use different save/restore
93  * macros depending upon the alignment. 2. The lwp_stk_init code below
94  * could be changed to calculate if less space would be wasted if machpcb
95  * was first instead of second. However there is a REGOFF macro used in
96  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
97  * register area is a fixed distance from the %sp, and would have to be
98  * changed to a pointer or something...JJ said later.
99  */
100 caddr_t
101 lwp_stk_init(klwp_t *lwp, caddr_t stk)
102 {
103 	struct machpcb *mpcb;
104 	kfpu_t *fp;
105 	uintptr_t aln;
106 
107 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
108 	aln = (uintptr_t)stk & 0x3F;
109 	stk -= aln;
110 	fp = (kfpu_t *)stk;
111 	stk -= SA(sizeof (struct machpcb));
112 	mpcb = (struct machpcb *)stk;
113 	bzero(mpcb, sizeof (struct machpcb));
114 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
115 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
116 	lwp->lwp_fpu = (void *)fp;
117 	mpcb->mpcb_fpu = fp;
118 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
119 	mpcb->mpcb_thread = lwp->lwp_thread;
120 	mpcb->mpcb_wbcnt = 0;
121 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
122 		mpcb->mpcb_wstate = WSTATE_USER32;
123 		mpcb->mpcb_wbuf = kmem_alloc(MAXWIN * sizeof (struct rwindow32),
124 		    KM_SLEEP);
125 	} else {
126 		mpcb->mpcb_wstate = WSTATE_USER64;
127 		mpcb->mpcb_wbuf = kmem_alloc(MAXWIN * sizeof (struct rwindow64),
128 		    KM_SLEEP);
129 	}
130 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
131 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
132 	mpcb->mpcb_pa = va_to_pa(mpcb);
133 	return (stk);
134 }
135 
136 void
137 lwp_stk_fini(klwp_t *lwp)
138 {
139 	struct machpcb *mpcb = lwptompcb(lwp);
140 
141 	/*
142 	 * there might be windows still in the wbuf due to unmapped
143 	 * stack, misaligned stack pointer, etc.  We just free it.
144 	 */
145 	mpcb->mpcb_wbcnt = 0;
146 	if (mpcb->mpcb_wstate == WSTATE_USER32)
147 		kmem_free(mpcb->mpcb_wbuf, MAXWIN * sizeof (struct rwindow32));
148 	else
149 		kmem_free(mpcb->mpcb_wbuf, MAXWIN * sizeof (struct rwindow64));
150 	mpcb->mpcb_wbuf = NULL;
151 	mpcb->mpcb_wbuf_pa = -1;
152 }
153 
154 
155 /*
156  * Copy regs from parent to child.
157  */
158 void
159 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
160 {
161 	kthread_t *t, *pt = lwptot(lwp);
162 	struct machpcb *mpcb = lwptompcb(clwp);
163 	struct machpcb *pmpcb = lwptompcb(lwp);
164 	kfpu_t *fp, *pfp = lwptofpu(lwp);
165 	caddr_t wbuf;
166 	uint_t wstate;
167 
168 	t = mpcb->mpcb_thread;
169 	/*
170 	 * remember child's fp and wbuf since they will get erased during
171 	 * the bcopy.
172 	 */
173 	fp = mpcb->mpcb_fpu;
174 	wbuf = mpcb->mpcb_wbuf;
175 	wstate = mpcb->mpcb_wstate;
176 	/*
177 	 * Don't copy mpcb_frame since we hand-crafted it
178 	 * in thread_load().
179 	 */
180 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
181 	mpcb->mpcb_thread = t;
182 	mpcb->mpcb_fpu = fp;
183 	fp->fpu_q = mpcb->mpcb_fpu_q;
184 
185 	/*
186 	 * It is theoretically possibly for the lwp's wstate to
187 	 * be different from its value assigned in lwp_stk_init,
188 	 * since lwp_stk_init assumed the data model of the process.
189 	 * Here, we took on the data model of the cloned lwp.
190 	 */
191 	if (mpcb->mpcb_wstate != wstate) {
192 		size_t osize, size;
193 
194 		if (wstate == WSTATE_USER32) {
195 			osize = MAXWIN * sizeof (struct rwindow32);
196 			size = MAXWIN * sizeof (struct rwindow64);
197 			wstate = WSTATE_USER64;
198 		} else {
199 			osize = MAXWIN * sizeof (struct rwindow64);
200 			size = MAXWIN * sizeof (struct rwindow32);
201 			wstate = WSTATE_USER32;
202 		}
203 		kmem_free(wbuf, osize);
204 		wbuf = kmem_alloc(size, KM_SLEEP);
205 	}
206 
207 	mpcb->mpcb_pa = va_to_pa(mpcb);
208 	mpcb->mpcb_wbuf = wbuf;
209 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
210 
211 	ASSERT(mpcb->mpcb_wstate == wstate);
212 
213 	if (mpcb->mpcb_wbcnt != 0) {
214 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
215 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
216 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
217 	}
218 
219 	if (pt == curthread)
220 		pfp->fpu_fprs = _fp_read_fprs();
221 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
222 		if (pt == curthread && fpu_exists) {
223 			save_gsr(clwp->lwp_fpu);
224 		} else {
225 			uint64_t gsr;
226 			gsr = get_gsr(lwp->lwp_fpu);
227 			set_gsr(gsr, clwp->lwp_fpu);
228 		}
229 		fp_fork(lwp, clwp);
230 	}
231 }
232 
233 /*
234  * Free lwp fpu regs.
235  */
236 void
237 lwp_freeregs(klwp_t *lwp, int isexec)
238 {
239 	kfpu_t *fp = lwptofpu(lwp);
240 
241 	if (lwptot(lwp) == curthread)
242 		fp->fpu_fprs = _fp_read_fprs();
243 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
244 		fp_free(fp, isexec);
245 }
246 
247 /*
248  * fill in the extra register state area specified with the
249  * specified lwp's platform-dependent non-floating-point extra
250  * register state information
251  */
252 /* ARGSUSED */
253 void
254 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
255 {
256 	/* for sun4u nothing to do here, added for symmetry */
257 }
258 
259 /*
260  * fill in the extra register state area specified with the specified lwp's
261  * platform-dependent floating-point extra register state information.
262  * NOTE:  'lwp' might not correspond to 'curthread' since this is
263  * called from code in /proc to get the registers of another lwp.
264  */
265 void
266 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
267 {
268 	prxregset_t *xregs = (prxregset_t *)xrp;
269 	kfpu_t *fp = lwptofpu(lwp);
270 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
271 	uint64_t gsr;
272 
273 	/*
274 	 * fp_fksave() does not flush the GSR register into
275 	 * the lwp area, so do it now
276 	 */
277 	kpreempt_disable();
278 	if (ttolwp(curthread) == lwp && fpu_exists) {
279 		fp->fpu_fprs = _fp_read_fprs();
280 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
281 			_fp_write_fprs(fprs);
282 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
283 		}
284 		save_gsr(fp);
285 	}
286 	gsr = get_gsr(fp);
287 	kpreempt_enable();
288 	PRXREG_GSR(xregs) = gsr;
289 }
290 
291 /*
292  * set the specified lwp's platform-dependent non-floating-point
293  * extra register state based on the specified input
294  */
295 /* ARGSUSED */
296 void
297 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
298 {
299 	/* for sun4u nothing to do here, added for symmetry */
300 }
301 
302 /*
303  * set the specified lwp's platform-dependent floating-point
304  * extra register state based on the specified input
305  */
306 void
307 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
308 {
309 	prxregset_t *xregs = (prxregset_t *)xrp;
310 	kfpu_t *fp = lwptofpu(lwp);
311 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
312 	uint64_t gsr = PRXREG_GSR(xregs);
313 
314 	kpreempt_disable();
315 	set_gsr(gsr, lwptofpu(lwp));
316 
317 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
318 		fp->fpu_fprs = _fp_read_fprs();
319 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
320 			_fp_write_fprs(fprs);
321 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
322 		}
323 		restore_gsr(lwptofpu(lwp));
324 	}
325 	kpreempt_enable();
326 }
327 
328 /*
329  * fill in the sun4u asrs, ie, the lwp's platform-dependent
330  * non-floating-point extra register state information
331  */
332 /* ARGSUSED */
333 void
334 getasrs(klwp_t *lwp, asrset_t asr)
335 {
336 	/* for sun4u nothing to do here, added for symmetry */
337 }
338 
339 /*
340  * fill in the sun4u asrs, ie, the lwp's platform-dependent
341  * floating-point extra register state information
342  */
343 void
344 getfpasrs(klwp_t *lwp, asrset_t asr)
345 {
346 	kfpu_t *fp = lwptofpu(lwp);
347 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
348 
349 	kpreempt_disable();
350 	if (ttolwp(curthread) == lwp)
351 		fp->fpu_fprs = _fp_read_fprs();
352 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
353 		if (fpu_exists && ttolwp(curthread) == lwp) {
354 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
355 				_fp_write_fprs(fprs);
356 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
357 			}
358 			save_gsr(fp);
359 		}
360 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
361 	}
362 	kpreempt_enable();
363 }
364 
365 /*
366  * set the sun4u asrs, ie, the lwp's platform-dependent
367  * non-floating-point extra register state information
368  */
369 /* ARGSUSED */
370 void
371 setasrs(klwp_t *lwp, asrset_t asr)
372 {
373 	/* for sun4u nothing to do here, added for symmetry */
374 }
375 
376 void
377 setfpasrs(klwp_t *lwp, asrset_t asr)
378 {
379 	kfpu_t *fp = lwptofpu(lwp);
380 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
381 
382 	kpreempt_disable();
383 	if (ttolwp(curthread) == lwp)
384 		fp->fpu_fprs = _fp_read_fprs();
385 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
386 		set_gsr(asr[ASR_GSR], fp);
387 		if (fpu_exists && ttolwp(curthread) == lwp) {
388 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
389 				_fp_write_fprs(fprs);
390 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
391 			}
392 			restore_gsr(fp);
393 		}
394 	}
395 	kpreempt_enable();
396 }
397 
398 /*
399  * Create interrupt kstats for this CPU.
400  */
401 void
402 cpu_create_intrstat(cpu_t *cp)
403 {
404 	int		i;
405 	kstat_t		*intr_ksp;
406 	kstat_named_t	*knp;
407 	char		name[KSTAT_STRLEN];
408 	zoneid_t	zoneid;
409 
410 	ASSERT(MUTEX_HELD(&cpu_lock));
411 
412 	if (pool_pset_enabled())
413 		zoneid = GLOBAL_ZONEID;
414 	else
415 		zoneid = ALL_ZONES;
416 
417 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
418 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
419 
420 	/*
421 	 * Initialize each PIL's named kstat
422 	 */
423 	if (intr_ksp != NULL) {
424 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
425 		knp = (kstat_named_t *)intr_ksp->ks_data;
426 		intr_ksp->ks_private = cp;
427 		for (i = 0; i < PIL_MAX; i++) {
428 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
429 			    i + 1);
430 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
431 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
432 			    i + 1);
433 			kstat_named_init(&knp[(i * 2) + 1], name,
434 			    KSTAT_DATA_UINT64);
435 		}
436 		kstat_install(intr_ksp);
437 	}
438 }
439 
440 /*
441  * Delete interrupt kstats for this CPU.
442  */
443 void
444 cpu_delete_intrstat(cpu_t *cp)
445 {
446 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
447 }
448 
449 /*
450  * Convert interrupt statistics from CPU ticks to nanoseconds and
451  * update kstat.
452  */
453 int
454 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
455 {
456 	kstat_named_t	*knp = ksp->ks_data;
457 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
458 	int		i;
459 
460 	if (rw == KSTAT_WRITE)
461 		return (EACCES);
462 
463 	/*
464 	 * We use separate passes to copy and convert the statistics to
465 	 * nanoseconds. This assures that the snapshot of the data is as
466 	 * self-consistent as possible.
467 	 */
468 
469 	for (i = 0; i < PIL_MAX; i++) {
470 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
471 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
472 	}
473 
474 	for (i = 0; i < PIL_MAX; i++) {
475 		knp[i * 2].value.ui64 =
476 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
477 			cpup->cpu_id);
478 	}
479 
480 	return (0);
481 }
482 
483 /*
484  * Called by common/os/cpu.c for psrinfo(1m) kstats
485  */
486 char *
487 cpu_fru_fmri(cpu_t *cp)
488 {
489 	return (cpunodes[cp->cpu_id].fru_fmri);
490 }
491 
492 /*
493  * An interrupt thread is ending a time slice, so compute the interval it
494  * ran for and update the statistic for its PIL.
495  */
496 void
497 cpu_intr_swtch_enter(kthread_id_t t)
498 {
499 	uint64_t	interval;
500 	uint64_t	start;
501 	cpu_t		*cpu;
502 
503 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
504 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
505 
506 	/*
507 	 * We could be here with a zero timestamp. This could happen if:
508 	 * an interrupt thread which no longer has a pinned thread underneath
509 	 * it (i.e. it blocked at some point in its past) has finished running
510 	 * its handler. intr_thread() updated the interrupt statistic for its
511 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
512 	 * return to, swtch() gets called and we end up here.
513 	 *
514 	 * It can also happen if an interrupt thread in intr_thread() calls
515 	 * preempt. It will have already taken care of updating stats. In
516 	 * this event, the interrupt thread will be runnable.
517 	 */
518 	if (t->t_intr_start) {
519 		do {
520 			start = t->t_intr_start;
521 			interval = gettick_counter() - start;
522 		} while (cas64(&t->t_intr_start, start, 0) != start);
523 		cpu = CPU;
524 		if (cpu->cpu_m.divisor > 1)
525 			interval *= cpu->cpu_m.divisor;
526 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
527 
528 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
529 		    interval);
530 	} else
531 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
532 }
533 
534 
535 /*
536  * An interrupt thread is returning from swtch(). Place a starting timestamp
537  * in its thread structure.
538  */
539 void
540 cpu_intr_swtch_exit(kthread_id_t t)
541 {
542 	uint64_t ts;
543 
544 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
545 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
546 
547 	do {
548 		ts = t->t_intr_start;
549 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
550 }
551 
552 
553 int
554 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
555 {
556 	if (&plat_blacklist)
557 		return (plat_blacklist(cmd, scheme, fmri, class));
558 
559 	return (ENOTSUP);
560 }
561 
562 int
563 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
564 {
565 	extern void kdi_flush_caches(void);
566 	size_t nread = 0;
567 	uint32_t word;
568 	int slop, i;
569 
570 	kdi_flush_caches();
571 	membar_enter();
572 
573 	/* We might not begin on a word boundary. */
574 	if ((slop = addr & 3) != 0) {
575 		word = ldphys(addr & ~3);
576 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
577 			*buf++ = ((uchar_t *)&word)[i];
578 		addr = roundup(addr, 4);
579 	}
580 
581 	while (nbytes > 0) {
582 		word = ldphys(addr);
583 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
584 			*buf++ = ((uchar_t *)&word)[i];
585 	}
586 
587 	kdi_flush_caches();
588 
589 	*ncopiedp = nread;
590 	return (0);
591 }
592 
593 int
594 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
595 {
596 	extern void kdi_flush_caches(void);
597 	size_t nwritten = 0;
598 	uint32_t word;
599 	int slop, i;
600 
601 	kdi_flush_caches();
602 
603 	/* We might not begin on a word boundary. */
604 	if ((slop = addr & 3) != 0) {
605 		word = ldphys(addr & ~3);
606 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
607 			((uchar_t *)&word)[i] = *buf++;
608 		stphys(addr & ~3, word);
609 		addr = roundup(addr, 4);
610 	}
611 
612 	while (nbytes > 3) {
613 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
614 			((uchar_t *)&word)[i] = *buf++;
615 		stphys(addr, word);
616 		addr += 4;
617 	}
618 
619 	/* We might not end with a whole word. */
620 	if (nbytes > 0) {
621 		word = ldphys(addr);
622 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
623 			((uchar_t *)&word)[i] = *buf++;
624 		stphys(addr, word);
625 	}
626 
627 	membar_enter();
628 	kdi_flush_caches();
629 
630 	*ncopiedp = nwritten;
631 	return (0);
632 }
633 
634 static void
635 kdi_kernpanic(struct regs *regs, uint_t tt)
636 {
637 	sync_reg_buf = *regs;
638 	sync_tt = tt;
639 
640 	sync_handler();
641 }
642 
643 static void
644 kdi_plat_call(void (*platfn)(void))
645 {
646 	if (platfn != NULL) {
647 		prom_suspend_prepost();
648 		platfn();
649 		prom_resume_prepost();
650 	}
651 }
652 
653 void
654 mach_kdi_init(kdi_t *kdi)
655 {
656 	kdi->kdi_plat_call = kdi_plat_call;
657 	kdi->mkdi_cpu_index = kdi_cpu_index;
658 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
659 	kdi->mkdi_kernpanic = kdi_kernpanic;
660 }
661 
662 
663 /*
664  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
665  * long, and it fills in the array with the time spent on cpu in
666  * each of the mstates, where time is returned in nsec.
667  *
668  * No guarantee is made that the returned values in times[] will
669  * monotonically increase on sequential calls, although this will
670  * be true in the long run. Any such guarantee must be handled by
671  * the caller, if needed. This can happen if we fail to account
672  * for elapsed time due to a generation counter conflict, yet we
673  * did account for it on a prior call (see below).
674  *
675  * The complication is that the cpu in question may be updating
676  * its microstate at the same time that we are reading it.
677  * Because the microstate is only updated when the CPU's state
678  * changes, the values in cpu_intracct[] can be indefinitely out
679  * of date. To determine true current values, it is necessary to
680  * compare the current time with cpu_mstate_start, and add the
681  * difference to times[cpu_mstate].
682  *
683  * This can be a problem if those values are changing out from
684  * under us. Because the code path in new_cpu_mstate() is
685  * performance critical, we have not added a lock to it. Instead,
686  * we have added a generation counter. Before beginning
687  * modifications, the counter is set to 0. After modifications,
688  * it is set to the old value plus one.
689  *
690  * get_cpu_mstate() will not consider the values of cpu_mstate
691  * and cpu_mstate_start to be usable unless the value of
692  * cpu_mstate_gen is both non-zero and unchanged, both before and
693  * after reading the mstate information. Note that we must
694  * protect against out-of-order loads around accesses to the
695  * generation counter. Also, this is a best effort approach in
696  * that we do not retry should the counter be found to have
697  * changed.
698  *
699  * cpu_intracct[] is used to identify time spent in each CPU
700  * mstate while handling interrupts. Such time should be reported
701  * against system time, and so is subtracted out from its
702  * corresponding cpu_acct[] time and added to
703  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
704  * %ticks, but acct time may be stored as %sticks, thus requiring
705  * different conversions before they can be compared.
706  */
707 
708 void
709 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
710 {
711 	int i;
712 	hrtime_t now, start;
713 	uint16_t gen;
714 	uint16_t state;
715 	hrtime_t intracct[NCMSTATES];
716 
717 	/*
718 	 * Load all volatile state under the protection of membar.
719 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
720 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
721 	 * arrives after we make our last check of cpu_mstate_gen.
722 	 */
723 
724 	now = gethrtime_unscaled();
725 	gen = cpu->cpu_mstate_gen;
726 
727 	membar_consumer();	/* guarantee load ordering */
728 	start = cpu->cpu_mstate_start;
729 	state = cpu->cpu_mstate;
730 	for (i = 0; i < NCMSTATES; i++) {
731 		intracct[i] = cpu->cpu_intracct[i];
732 		times[i] = cpu->cpu_acct[i];
733 	}
734 	membar_consumer();	/* guarantee load ordering */
735 
736 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
737 		times[state] += now - start;
738 
739 	for (i = 0; i < NCMSTATES; i++) {
740 		scalehrtime(&times[i]);
741 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
742 	}
743 
744 	for (i = 0; i < NCMSTATES; i++) {
745 		if (i == CMS_SYSTEM)
746 			continue;
747 		times[i] -= intracct[i];
748 		if (times[i] < 0) {
749 			intracct[i] += times[i];
750 			times[i] = 0;
751 		}
752 		times[CMS_SYSTEM] += intracct[i];
753 	}
754 }
755