xref: /titanic_51/usr/src/uts/sun4/os/machdep.c (revision bf7c2d400a7b538aed6f356c7107284378a19fa8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/cmp.h>
51 #include <sys/platform_module.h>
52 #include <sys/bl.h>
53 #include <sys/nvpair.h>
54 #include <sys/kdi_impl.h>
55 #include <sys/machsystm.h>
56 #include <sys/sysmacros.h>
57 #include <sys/promif.h>
58 #include <sys/pool_pset.h>
59 #include <vm/seg_kmem.h>
60 
61 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
62 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
63 
64 /*
65  * Initialize kernel thread's stack.
66  */
67 caddr_t
68 thread_stk_init(caddr_t stk)
69 {
70 	kfpu_t *fp;
71 	ulong_t align;
72 
73 	/* allocate extra space for floating point state */
74 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
75 	align = (uintptr_t)stk & 0x3f;
76 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
77 	fp = (kfpu_t *)stk;
78 	fp->fpu_fprs = 0;
79 
80 	stk -= SA(MINFRAME);
81 	return (stk);
82 }
83 
84 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
85 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
86 
87 kmem_cache_t	*wbuf32_cache;
88 kmem_cache_t	*wbuf64_cache;
89 
90 void
91 lwp_stk_cache_init(void)
92 {
93 	/*
94 	 * Window buffers are allocated from the static arena
95 	 * because they are accessed at TL>0. We also must use
96 	 * KMC_NOHASH to prevent them from straddling page
97 	 * boundaries as they are accessed by physical address.
98 	 */
99 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
100 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
101 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
102 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
103 }
104 
105 /*
106  * Initialize lwp's kernel stack.
107  * Note that now that the floating point register save area (kfpu_t)
108  * has been broken out from machpcb and aligned on a 64 byte boundary so that
109  * we can do block load/stores to/from it, there are a couple of potential
110  * optimizations to save stack space. 1. The floating point register save
111  * area could be aligned on a 16 byte boundary, and the floating point code
112  * changed to (a) check the alignment and (b) use different save/restore
113  * macros depending upon the alignment. 2. The lwp_stk_init code below
114  * could be changed to calculate if less space would be wasted if machpcb
115  * was first instead of second. However there is a REGOFF macro used in
116  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
117  * register area is a fixed distance from the %sp, and would have to be
118  * changed to a pointer or something...JJ said later.
119  */
120 caddr_t
121 lwp_stk_init(klwp_t *lwp, caddr_t stk)
122 {
123 	struct machpcb *mpcb;
124 	kfpu_t *fp;
125 	uintptr_t aln;
126 
127 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
128 	aln = (uintptr_t)stk & 0x3F;
129 	stk -= aln;
130 	fp = (kfpu_t *)stk;
131 	stk -= SA(sizeof (struct machpcb));
132 	mpcb = (struct machpcb *)stk;
133 	bzero(mpcb, sizeof (struct machpcb));
134 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
135 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
136 	lwp->lwp_fpu = (void *)fp;
137 	mpcb->mpcb_fpu = fp;
138 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
139 	mpcb->mpcb_thread = lwp->lwp_thread;
140 	mpcb->mpcb_wbcnt = 0;
141 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
142 		mpcb->mpcb_wstate = WSTATE_USER32;
143 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
144 	} else {
145 		mpcb->mpcb_wstate = WSTATE_USER64;
146 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
147 	}
148 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
149 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
150 	mpcb->mpcb_pa = va_to_pa(mpcb);
151 	return (stk);
152 }
153 
154 void
155 lwp_stk_fini(klwp_t *lwp)
156 {
157 	struct machpcb *mpcb = lwptompcb(lwp);
158 
159 	/*
160 	 * there might be windows still in the wbuf due to unmapped
161 	 * stack, misaligned stack pointer, etc.  We just free it.
162 	 */
163 	mpcb->mpcb_wbcnt = 0;
164 	if (mpcb->mpcb_wstate == WSTATE_USER32)
165 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
166 	else
167 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
168 	mpcb->mpcb_wbuf = NULL;
169 	mpcb->mpcb_wbuf_pa = -1;
170 }
171 
172 
173 /*
174  * Copy regs from parent to child.
175  */
176 void
177 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
178 {
179 	kthread_t *t, *pt = lwptot(lwp);
180 	struct machpcb *mpcb = lwptompcb(clwp);
181 	struct machpcb *pmpcb = lwptompcb(lwp);
182 	kfpu_t *fp, *pfp = lwptofpu(lwp);
183 	caddr_t wbuf;
184 	uint_t wstate;
185 
186 	t = mpcb->mpcb_thread;
187 	/*
188 	 * remember child's fp and wbuf since they will get erased during
189 	 * the bcopy.
190 	 */
191 	fp = mpcb->mpcb_fpu;
192 	wbuf = mpcb->mpcb_wbuf;
193 	wstate = mpcb->mpcb_wstate;
194 	/*
195 	 * Don't copy mpcb_frame since we hand-crafted it
196 	 * in thread_load().
197 	 */
198 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
199 	mpcb->mpcb_thread = t;
200 	mpcb->mpcb_fpu = fp;
201 	fp->fpu_q = mpcb->mpcb_fpu_q;
202 
203 	/*
204 	 * It is theoretically possibly for the lwp's wstate to
205 	 * be different from its value assigned in lwp_stk_init,
206 	 * since lwp_stk_init assumed the data model of the process.
207 	 * Here, we took on the data model of the cloned lwp.
208 	 */
209 	if (mpcb->mpcb_wstate != wstate) {
210 		if (wstate == WSTATE_USER32) {
211 			kmem_cache_free(wbuf32_cache, wbuf);
212 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
213 			wstate = WSTATE_USER64;
214 		} else {
215 			kmem_cache_free(wbuf64_cache, wbuf);
216 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
217 			wstate = WSTATE_USER32;
218 		}
219 	}
220 
221 	mpcb->mpcb_pa = va_to_pa(mpcb);
222 	mpcb->mpcb_wbuf = wbuf;
223 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
224 
225 	ASSERT(mpcb->mpcb_wstate == wstate);
226 
227 	if (mpcb->mpcb_wbcnt != 0) {
228 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
229 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
230 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
231 	}
232 
233 	if (pt == curthread)
234 		pfp->fpu_fprs = _fp_read_fprs();
235 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
236 		if (pt == curthread && fpu_exists) {
237 			save_gsr(clwp->lwp_fpu);
238 		} else {
239 			uint64_t gsr;
240 			gsr = get_gsr(lwp->lwp_fpu);
241 			set_gsr(gsr, clwp->lwp_fpu);
242 		}
243 		fp_fork(lwp, clwp);
244 	}
245 }
246 
247 /*
248  * Free lwp fpu regs.
249  */
250 void
251 lwp_freeregs(klwp_t *lwp, int isexec)
252 {
253 	kfpu_t *fp = lwptofpu(lwp);
254 
255 	if (lwptot(lwp) == curthread)
256 		fp->fpu_fprs = _fp_read_fprs();
257 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
258 		fp_free(fp, isexec);
259 }
260 
261 /*
262  * This function is currently unused on sparc.
263  */
264 /*ARGSUSED*/
265 void
266 lwp_attach_brand_hdlrs(klwp_t *lwp)
267 {}
268 
269 /*
270  * fill in the extra register state area specified with the
271  * specified lwp's platform-dependent non-floating-point extra
272  * register state information
273  */
274 /* ARGSUSED */
275 void
276 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
277 {
278 	/* for sun4u nothing to do here, added for symmetry */
279 }
280 
281 /*
282  * fill in the extra register state area specified with the specified lwp's
283  * platform-dependent floating-point extra register state information.
284  * NOTE:  'lwp' might not correspond to 'curthread' since this is
285  * called from code in /proc to get the registers of another lwp.
286  */
287 void
288 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
289 {
290 	prxregset_t *xregs = (prxregset_t *)xrp;
291 	kfpu_t *fp = lwptofpu(lwp);
292 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
293 	uint64_t gsr;
294 
295 	/*
296 	 * fp_fksave() does not flush the GSR register into
297 	 * the lwp area, so do it now
298 	 */
299 	kpreempt_disable();
300 	if (ttolwp(curthread) == lwp && fpu_exists) {
301 		fp->fpu_fprs = _fp_read_fprs();
302 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
303 			_fp_write_fprs(fprs);
304 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
305 		}
306 		save_gsr(fp);
307 	}
308 	gsr = get_gsr(fp);
309 	kpreempt_enable();
310 	PRXREG_GSR(xregs) = gsr;
311 }
312 
313 /*
314  * set the specified lwp's platform-dependent non-floating-point
315  * extra register state based on the specified input
316  */
317 /* ARGSUSED */
318 void
319 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
320 {
321 	/* for sun4u nothing to do here, added for symmetry */
322 }
323 
324 /*
325  * set the specified lwp's platform-dependent floating-point
326  * extra register state based on the specified input
327  */
328 void
329 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
330 {
331 	prxregset_t *xregs = (prxregset_t *)xrp;
332 	kfpu_t *fp = lwptofpu(lwp);
333 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
334 	uint64_t gsr = PRXREG_GSR(xregs);
335 
336 	kpreempt_disable();
337 	set_gsr(gsr, lwptofpu(lwp));
338 
339 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
340 		fp->fpu_fprs = _fp_read_fprs();
341 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
342 			_fp_write_fprs(fprs);
343 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
344 		}
345 		restore_gsr(lwptofpu(lwp));
346 	}
347 	kpreempt_enable();
348 }
349 
350 /*
351  * fill in the sun4u asrs, ie, the lwp's platform-dependent
352  * non-floating-point extra register state information
353  */
354 /* ARGSUSED */
355 void
356 getasrs(klwp_t *lwp, asrset_t asr)
357 {
358 	/* for sun4u nothing to do here, added for symmetry */
359 }
360 
361 /*
362  * fill in the sun4u asrs, ie, the lwp's platform-dependent
363  * floating-point extra register state information
364  */
365 void
366 getfpasrs(klwp_t *lwp, asrset_t asr)
367 {
368 	kfpu_t *fp = lwptofpu(lwp);
369 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
370 
371 	kpreempt_disable();
372 	if (ttolwp(curthread) == lwp)
373 		fp->fpu_fprs = _fp_read_fprs();
374 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
375 		if (fpu_exists && ttolwp(curthread) == lwp) {
376 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
377 				_fp_write_fprs(fprs);
378 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
379 			}
380 			save_gsr(fp);
381 		}
382 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
383 	}
384 	kpreempt_enable();
385 }
386 
387 /*
388  * set the sun4u asrs, ie, the lwp's platform-dependent
389  * non-floating-point extra register state information
390  */
391 /* ARGSUSED */
392 void
393 setasrs(klwp_t *lwp, asrset_t asr)
394 {
395 	/* for sun4u nothing to do here, added for symmetry */
396 }
397 
398 void
399 setfpasrs(klwp_t *lwp, asrset_t asr)
400 {
401 	kfpu_t *fp = lwptofpu(lwp);
402 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
403 
404 	kpreempt_disable();
405 	if (ttolwp(curthread) == lwp)
406 		fp->fpu_fprs = _fp_read_fprs();
407 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
408 		set_gsr(asr[ASR_GSR], fp);
409 		if (fpu_exists && ttolwp(curthread) == lwp) {
410 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
411 				_fp_write_fprs(fprs);
412 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
413 			}
414 			restore_gsr(fp);
415 		}
416 	}
417 	kpreempt_enable();
418 }
419 
420 /*
421  * Create interrupt kstats for this CPU.
422  */
423 void
424 cpu_create_intrstat(cpu_t *cp)
425 {
426 	int		i;
427 	kstat_t		*intr_ksp;
428 	kstat_named_t	*knp;
429 	char		name[KSTAT_STRLEN];
430 	zoneid_t	zoneid;
431 
432 	ASSERT(MUTEX_HELD(&cpu_lock));
433 
434 	if (pool_pset_enabled())
435 		zoneid = GLOBAL_ZONEID;
436 	else
437 		zoneid = ALL_ZONES;
438 
439 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
440 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
441 
442 	/*
443 	 * Initialize each PIL's named kstat
444 	 */
445 	if (intr_ksp != NULL) {
446 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
447 		knp = (kstat_named_t *)intr_ksp->ks_data;
448 		intr_ksp->ks_private = cp;
449 		for (i = 0; i < PIL_MAX; i++) {
450 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
451 			    i + 1);
452 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
453 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
454 			    i + 1);
455 			kstat_named_init(&knp[(i * 2) + 1], name,
456 			    KSTAT_DATA_UINT64);
457 		}
458 		kstat_install(intr_ksp);
459 	}
460 }
461 
462 /*
463  * Delete interrupt kstats for this CPU.
464  */
465 void
466 cpu_delete_intrstat(cpu_t *cp)
467 {
468 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
469 }
470 
471 /*
472  * Convert interrupt statistics from CPU ticks to nanoseconds and
473  * update kstat.
474  */
475 int
476 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
477 {
478 	kstat_named_t	*knp = ksp->ks_data;
479 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
480 	int		i;
481 
482 	if (rw == KSTAT_WRITE)
483 		return (EACCES);
484 
485 	/*
486 	 * We use separate passes to copy and convert the statistics to
487 	 * nanoseconds. This assures that the snapshot of the data is as
488 	 * self-consistent as possible.
489 	 */
490 
491 	for (i = 0; i < PIL_MAX; i++) {
492 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
493 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
494 	}
495 
496 	for (i = 0; i < PIL_MAX; i++) {
497 		knp[i * 2].value.ui64 =
498 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
499 			cpup->cpu_id);
500 	}
501 
502 	return (0);
503 }
504 
505 /*
506  * Called by common/os/cpu.c for psrinfo(1m) kstats
507  */
508 char *
509 cpu_fru_fmri(cpu_t *cp)
510 {
511 	return (cpunodes[cp->cpu_id].fru_fmri);
512 }
513 
514 /*
515  * An interrupt thread is ending a time slice, so compute the interval it
516  * ran for and update the statistic for its PIL.
517  */
518 void
519 cpu_intr_swtch_enter(kthread_id_t t)
520 {
521 	uint64_t	interval;
522 	uint64_t	start;
523 	cpu_t		*cpu;
524 
525 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
526 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
527 
528 	/*
529 	 * We could be here with a zero timestamp. This could happen if:
530 	 * an interrupt thread which no longer has a pinned thread underneath
531 	 * it (i.e. it blocked at some point in its past) has finished running
532 	 * its handler. intr_thread() updated the interrupt statistic for its
533 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
534 	 * return to, swtch() gets called and we end up here.
535 	 *
536 	 * It can also happen if an interrupt thread in intr_thread() calls
537 	 * preempt. It will have already taken care of updating stats. In
538 	 * this event, the interrupt thread will be runnable.
539 	 */
540 	if (t->t_intr_start) {
541 		do {
542 			start = t->t_intr_start;
543 			interval = gettick_counter() - start;
544 		} while (cas64(&t->t_intr_start, start, 0) != start);
545 		cpu = CPU;
546 		if (cpu->cpu_m.divisor > 1)
547 			interval *= cpu->cpu_m.divisor;
548 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
549 
550 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
551 		    interval);
552 	} else
553 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
554 }
555 
556 
557 /*
558  * An interrupt thread is returning from swtch(). Place a starting timestamp
559  * in its thread structure.
560  */
561 void
562 cpu_intr_swtch_exit(kthread_id_t t)
563 {
564 	uint64_t ts;
565 
566 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
567 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
568 
569 	do {
570 		ts = t->t_intr_start;
571 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
572 }
573 
574 
575 int
576 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
577 {
578 	if (&plat_blacklist)
579 		return (plat_blacklist(cmd, scheme, fmri, class));
580 
581 	return (ENOTSUP);
582 }
583 
584 int
585 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
586 {
587 	extern void kdi_flush_caches(void);
588 	size_t nread = 0;
589 	uint32_t word;
590 	int slop, i;
591 
592 	kdi_flush_caches();
593 	membar_enter();
594 
595 	/* We might not begin on a word boundary. */
596 	if ((slop = addr & 3) != 0) {
597 		word = ldphys(addr & ~3);
598 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
599 			*buf++ = ((uchar_t *)&word)[i];
600 		addr = roundup(addr, 4);
601 	}
602 
603 	while (nbytes > 0) {
604 		word = ldphys(addr);
605 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
606 			*buf++ = ((uchar_t *)&word)[i];
607 	}
608 
609 	kdi_flush_caches();
610 
611 	*ncopiedp = nread;
612 	return (0);
613 }
614 
615 int
616 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
617 {
618 	extern void kdi_flush_caches(void);
619 	size_t nwritten = 0;
620 	uint32_t word;
621 	int slop, i;
622 
623 	kdi_flush_caches();
624 
625 	/* We might not begin on a word boundary. */
626 	if ((slop = addr & 3) != 0) {
627 		word = ldphys(addr & ~3);
628 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
629 			((uchar_t *)&word)[i] = *buf++;
630 		stphys(addr & ~3, word);
631 		addr = roundup(addr, 4);
632 	}
633 
634 	while (nbytes > 3) {
635 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
636 			((uchar_t *)&word)[i] = *buf++;
637 		stphys(addr, word);
638 		addr += 4;
639 	}
640 
641 	/* We might not end with a whole word. */
642 	if (nbytes > 0) {
643 		word = ldphys(addr);
644 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
645 			((uchar_t *)&word)[i] = *buf++;
646 		stphys(addr, word);
647 	}
648 
649 	membar_enter();
650 	kdi_flush_caches();
651 
652 	*ncopiedp = nwritten;
653 	return (0);
654 }
655 
656 static void
657 kdi_kernpanic(struct regs *regs, uint_t tt)
658 {
659 	sync_reg_buf = *regs;
660 	sync_tt = tt;
661 
662 	sync_handler();
663 }
664 
665 static void
666 kdi_plat_call(void (*platfn)(void))
667 {
668 	if (platfn != NULL) {
669 		prom_suspend_prepost();
670 		platfn();
671 		prom_resume_prepost();
672 	}
673 }
674 
675 void
676 mach_kdi_init(kdi_t *kdi)
677 {
678 	kdi->kdi_plat_call = kdi_plat_call;
679 	kdi->mkdi_cpu_index = kdi_cpu_index;
680 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
681 	kdi->mkdi_kernpanic = kdi_kernpanic;
682 }
683 
684 
685 /*
686  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
687  * long, and it fills in the array with the time spent on cpu in
688  * each of the mstates, where time is returned in nsec.
689  *
690  * No guarantee is made that the returned values in times[] will
691  * monotonically increase on sequential calls, although this will
692  * be true in the long run. Any such guarantee must be handled by
693  * the caller, if needed. This can happen if we fail to account
694  * for elapsed time due to a generation counter conflict, yet we
695  * did account for it on a prior call (see below).
696  *
697  * The complication is that the cpu in question may be updating
698  * its microstate at the same time that we are reading it.
699  * Because the microstate is only updated when the CPU's state
700  * changes, the values in cpu_intracct[] can be indefinitely out
701  * of date. To determine true current values, it is necessary to
702  * compare the current time with cpu_mstate_start, and add the
703  * difference to times[cpu_mstate].
704  *
705  * This can be a problem if those values are changing out from
706  * under us. Because the code path in new_cpu_mstate() is
707  * performance critical, we have not added a lock to it. Instead,
708  * we have added a generation counter. Before beginning
709  * modifications, the counter is set to 0. After modifications,
710  * it is set to the old value plus one.
711  *
712  * get_cpu_mstate() will not consider the values of cpu_mstate
713  * and cpu_mstate_start to be usable unless the value of
714  * cpu_mstate_gen is both non-zero and unchanged, both before and
715  * after reading the mstate information. Note that we must
716  * protect against out-of-order loads around accesses to the
717  * generation counter. Also, this is a best effort approach in
718  * that we do not retry should the counter be found to have
719  * changed.
720  *
721  * cpu_intracct[] is used to identify time spent in each CPU
722  * mstate while handling interrupts. Such time should be reported
723  * against system time, and so is subtracted out from its
724  * corresponding cpu_acct[] time and added to
725  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
726  * %ticks, but acct time may be stored as %sticks, thus requiring
727  * different conversions before they can be compared.
728  */
729 
730 void
731 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
732 {
733 	int i;
734 	hrtime_t now, start;
735 	uint16_t gen;
736 	uint16_t state;
737 	hrtime_t intracct[NCMSTATES];
738 
739 	/*
740 	 * Load all volatile state under the protection of membar.
741 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
742 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
743 	 * arrives after we make our last check of cpu_mstate_gen.
744 	 */
745 
746 	now = gethrtime_unscaled();
747 	gen = cpu->cpu_mstate_gen;
748 
749 	membar_consumer();	/* guarantee load ordering */
750 	start = cpu->cpu_mstate_start;
751 	state = cpu->cpu_mstate;
752 	for (i = 0; i < NCMSTATES; i++) {
753 		intracct[i] = cpu->cpu_intracct[i];
754 		times[i] = cpu->cpu_acct[i];
755 	}
756 	membar_consumer();	/* guarantee load ordering */
757 
758 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
759 		times[state] += now - start;
760 
761 	for (i = 0; i < NCMSTATES; i++) {
762 		scalehrtime(&times[i]);
763 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
764 	}
765 
766 	for (i = 0; i < NCMSTATES; i++) {
767 		if (i == CMS_SYSTEM)
768 			continue;
769 		times[i] -= intracct[i];
770 		if (times[i] < 0) {
771 			intracct[i] += times[i];
772 			times[i] = 0;
773 		}
774 		times[CMS_SYSTEM] += intracct[i];
775 	}
776 }
777