xref: /titanic_52/usr/src/uts/sun4/os/machdep.c (revision b0a27161a17f3895896e339e1786b15a1dcee3b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/kstat.h>
30 #include <sys/param.h>
31 #include <sys/stack.h>
32 #include <sys/regset.h>
33 #include <sys/thread.h>
34 #include <sys/proc.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/kmem.h>
37 #include <sys/cpuvar.h>
38 #include <sys/systm.h>
39 #include <sys/machpcb.h>
40 #include <sys/machasi.h>
41 #include <sys/vis.h>
42 #include <sys/fpu/fpusystm.h>
43 #include <sys/cpu_module.h>
44 #include <sys/privregs.h>
45 #include <sys/archsystm.h>
46 #include <sys/atomic.h>
47 #include <sys/cmn_err.h>
48 #include <sys/time.h>
49 #include <sys/clock.h>
50 #include <sys/cmp.h>
51 #include <sys/platform_module.h>
52 #include <sys/bl.h>
53 #include <sys/nvpair.h>
54 #include <sys/kdi_impl.h>
55 #include <sys/machsystm.h>
56 #include <sys/sysmacros.h>
57 #include <sys/promif.h>
58 #include <sys/pool_pset.h>
59 #include <sys/mem.h>
60 #include <sys/dumphdr.h>
61 #include <vm/seg_kmem.h>
62 #include <sys/hold_page.h>
63 #include <sys/cpu.h>
64 
65 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
66 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
67 
68 /*
69  * Initialize kernel thread's stack.
70  */
71 caddr_t
72 thread_stk_init(caddr_t stk)
73 {
74 	kfpu_t *fp;
75 	ulong_t align;
76 
77 	/* allocate extra space for floating point state */
78 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
79 	align = (uintptr_t)stk & 0x3f;
80 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
81 	fp = (kfpu_t *)stk;
82 	fp->fpu_fprs = 0;
83 
84 	stk -= SA(MINFRAME);
85 	return (stk);
86 }
87 
88 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
89 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
90 
91 kmem_cache_t	*wbuf32_cache;
92 kmem_cache_t	*wbuf64_cache;
93 
94 void
95 lwp_stk_cache_init(void)
96 {
97 	/*
98 	 * Window buffers are allocated from the static arena
99 	 * because they are accessed at TL>0. We also must use
100 	 * KMC_NOHASH to prevent them from straddling page
101 	 * boundaries as they are accessed by physical address.
102 	 */
103 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
104 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
105 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
106 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
107 }
108 
109 /*
110  * Initialize lwp's kernel stack.
111  * Note that now that the floating point register save area (kfpu_t)
112  * has been broken out from machpcb and aligned on a 64 byte boundary so that
113  * we can do block load/stores to/from it, there are a couple of potential
114  * optimizations to save stack space. 1. The floating point register save
115  * area could be aligned on a 16 byte boundary, and the floating point code
116  * changed to (a) check the alignment and (b) use different save/restore
117  * macros depending upon the alignment. 2. The lwp_stk_init code below
118  * could be changed to calculate if less space would be wasted if machpcb
119  * was first instead of second. However there is a REGOFF macro used in
120  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
121  * register area is a fixed distance from the %sp, and would have to be
122  * changed to a pointer or something...JJ said later.
123  */
124 caddr_t
125 lwp_stk_init(klwp_t *lwp, caddr_t stk)
126 {
127 	struct machpcb *mpcb;
128 	kfpu_t *fp;
129 	uintptr_t aln;
130 
131 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
132 	aln = (uintptr_t)stk & 0x3F;
133 	stk -= aln;
134 	fp = (kfpu_t *)stk;
135 	stk -= SA(sizeof (struct machpcb));
136 	mpcb = (struct machpcb *)stk;
137 	bzero(mpcb, sizeof (struct machpcb));
138 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
139 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
140 	lwp->lwp_fpu = (void *)fp;
141 	mpcb->mpcb_fpu = fp;
142 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
143 	mpcb->mpcb_thread = lwp->lwp_thread;
144 	mpcb->mpcb_wbcnt = 0;
145 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
146 		mpcb->mpcb_wstate = WSTATE_USER32;
147 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
148 	} else {
149 		mpcb->mpcb_wstate = WSTATE_USER64;
150 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
151 	}
152 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
153 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
154 	mpcb->mpcb_pa = va_to_pa(mpcb);
155 	return (stk);
156 }
157 
158 void
159 lwp_stk_fini(klwp_t *lwp)
160 {
161 	struct machpcb *mpcb = lwptompcb(lwp);
162 
163 	/*
164 	 * there might be windows still in the wbuf due to unmapped
165 	 * stack, misaligned stack pointer, etc.  We just free it.
166 	 */
167 	mpcb->mpcb_wbcnt = 0;
168 	if (mpcb->mpcb_wstate == WSTATE_USER32)
169 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
170 	else
171 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
172 	mpcb->mpcb_wbuf = NULL;
173 	mpcb->mpcb_wbuf_pa = -1;
174 }
175 
176 
177 /*
178  * Copy regs from parent to child.
179  */
180 void
181 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
182 {
183 	kthread_t *t, *pt = lwptot(lwp);
184 	struct machpcb *mpcb = lwptompcb(clwp);
185 	struct machpcb *pmpcb = lwptompcb(lwp);
186 	kfpu_t *fp, *pfp = lwptofpu(lwp);
187 	caddr_t wbuf;
188 	uint_t wstate;
189 
190 	t = mpcb->mpcb_thread;
191 	/*
192 	 * remember child's fp and wbuf since they will get erased during
193 	 * the bcopy.
194 	 */
195 	fp = mpcb->mpcb_fpu;
196 	wbuf = mpcb->mpcb_wbuf;
197 	wstate = mpcb->mpcb_wstate;
198 	/*
199 	 * Don't copy mpcb_frame since we hand-crafted it
200 	 * in thread_load().
201 	 */
202 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
203 	mpcb->mpcb_thread = t;
204 	mpcb->mpcb_fpu = fp;
205 	fp->fpu_q = mpcb->mpcb_fpu_q;
206 
207 	/*
208 	 * It is theoretically possibly for the lwp's wstate to
209 	 * be different from its value assigned in lwp_stk_init,
210 	 * since lwp_stk_init assumed the data model of the process.
211 	 * Here, we took on the data model of the cloned lwp.
212 	 */
213 	if (mpcb->mpcb_wstate != wstate) {
214 		if (wstate == WSTATE_USER32) {
215 			kmem_cache_free(wbuf32_cache, wbuf);
216 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
217 			wstate = WSTATE_USER64;
218 		} else {
219 			kmem_cache_free(wbuf64_cache, wbuf);
220 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
221 			wstate = WSTATE_USER32;
222 		}
223 	}
224 
225 	mpcb->mpcb_pa = va_to_pa(mpcb);
226 	mpcb->mpcb_wbuf = wbuf;
227 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
228 
229 	ASSERT(mpcb->mpcb_wstate == wstate);
230 
231 	if (mpcb->mpcb_wbcnt != 0) {
232 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
233 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
234 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
235 	}
236 
237 	if (pt == curthread)
238 		pfp->fpu_fprs = _fp_read_fprs();
239 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
240 		if (pt == curthread && fpu_exists) {
241 			save_gsr(clwp->lwp_fpu);
242 		} else {
243 			uint64_t gsr;
244 			gsr = get_gsr(lwp->lwp_fpu);
245 			set_gsr(gsr, clwp->lwp_fpu);
246 		}
247 		fp_fork(lwp, clwp);
248 	}
249 }
250 
251 /*
252  * Free lwp fpu regs.
253  */
254 void
255 lwp_freeregs(klwp_t *lwp, int isexec)
256 {
257 	kfpu_t *fp = lwptofpu(lwp);
258 
259 	if (lwptot(lwp) == curthread)
260 		fp->fpu_fprs = _fp_read_fprs();
261 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
262 		fp_free(fp, isexec);
263 }
264 
265 /*
266  * These function are currently unused on sparc.
267  */
268 /*ARGSUSED*/
269 void
270 lwp_attach_brand_hdlrs(klwp_t *lwp)
271 {}
272 
273 /*ARGSUSED*/
274 void
275 lwp_detach_brand_hdlrs(klwp_t *lwp)
276 {}
277 
278 /*
279  * fill in the extra register state area specified with the
280  * specified lwp's platform-dependent non-floating-point extra
281  * register state information
282  */
283 /* ARGSUSED */
284 void
285 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
286 {
287 	/* for sun4u nothing to do here, added for symmetry */
288 }
289 
290 /*
291  * fill in the extra register state area specified with the specified lwp's
292  * platform-dependent floating-point extra register state information.
293  * NOTE:  'lwp' might not correspond to 'curthread' since this is
294  * called from code in /proc to get the registers of another lwp.
295  */
296 void
297 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
298 {
299 	prxregset_t *xregs = (prxregset_t *)xrp;
300 	kfpu_t *fp = lwptofpu(lwp);
301 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
302 	uint64_t gsr;
303 
304 	/*
305 	 * fp_fksave() does not flush the GSR register into
306 	 * the lwp area, so do it now
307 	 */
308 	kpreempt_disable();
309 	if (ttolwp(curthread) == lwp && fpu_exists) {
310 		fp->fpu_fprs = _fp_read_fprs();
311 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
312 			_fp_write_fprs(fprs);
313 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
314 		}
315 		save_gsr(fp);
316 	}
317 	gsr = get_gsr(fp);
318 	kpreempt_enable();
319 	PRXREG_GSR(xregs) = gsr;
320 }
321 
322 /*
323  * set the specified lwp's platform-dependent non-floating-point
324  * extra register state based on the specified input
325  */
326 /* ARGSUSED */
327 void
328 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
329 {
330 	/* for sun4u nothing to do here, added for symmetry */
331 }
332 
333 /*
334  * set the specified lwp's platform-dependent floating-point
335  * extra register state based on the specified input
336  */
337 void
338 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
339 {
340 	prxregset_t *xregs = (prxregset_t *)xrp;
341 	kfpu_t *fp = lwptofpu(lwp);
342 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
343 	uint64_t gsr = PRXREG_GSR(xregs);
344 
345 	kpreempt_disable();
346 	set_gsr(gsr, lwptofpu(lwp));
347 
348 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
349 		fp->fpu_fprs = _fp_read_fprs();
350 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
351 			_fp_write_fprs(fprs);
352 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
353 		}
354 		restore_gsr(lwptofpu(lwp));
355 	}
356 	kpreempt_enable();
357 }
358 
359 /*
360  * fill in the sun4u asrs, ie, the lwp's platform-dependent
361  * non-floating-point extra register state information
362  */
363 /* ARGSUSED */
364 void
365 getasrs(klwp_t *lwp, asrset_t asr)
366 {
367 	/* for sun4u nothing to do here, added for symmetry */
368 }
369 
370 /*
371  * fill in the sun4u asrs, ie, the lwp's platform-dependent
372  * floating-point extra register state information
373  */
374 void
375 getfpasrs(klwp_t *lwp, asrset_t asr)
376 {
377 	kfpu_t *fp = lwptofpu(lwp);
378 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
379 
380 	kpreempt_disable();
381 	if (ttolwp(curthread) == lwp)
382 		fp->fpu_fprs = _fp_read_fprs();
383 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
384 		if (fpu_exists && ttolwp(curthread) == lwp) {
385 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
386 				_fp_write_fprs(fprs);
387 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
388 			}
389 			save_gsr(fp);
390 		}
391 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
392 	}
393 	kpreempt_enable();
394 }
395 
396 /*
397  * set the sun4u asrs, ie, the lwp's platform-dependent
398  * non-floating-point extra register state information
399  */
400 /* ARGSUSED */
401 void
402 setasrs(klwp_t *lwp, asrset_t asr)
403 {
404 	/* for sun4u nothing to do here, added for symmetry */
405 }
406 
407 void
408 setfpasrs(klwp_t *lwp, asrset_t asr)
409 {
410 	kfpu_t *fp = lwptofpu(lwp);
411 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
412 
413 	kpreempt_disable();
414 	if (ttolwp(curthread) == lwp)
415 		fp->fpu_fprs = _fp_read_fprs();
416 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
417 		set_gsr(asr[ASR_GSR], fp);
418 		if (fpu_exists && ttolwp(curthread) == lwp) {
419 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
420 				_fp_write_fprs(fprs);
421 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
422 			}
423 			restore_gsr(fp);
424 		}
425 	}
426 	kpreempt_enable();
427 }
428 
429 /*
430  * Create interrupt kstats for this CPU.
431  */
432 void
433 cpu_create_intrstat(cpu_t *cp)
434 {
435 	int		i;
436 	kstat_t		*intr_ksp;
437 	kstat_named_t	*knp;
438 	char		name[KSTAT_STRLEN];
439 	zoneid_t	zoneid;
440 
441 	ASSERT(MUTEX_HELD(&cpu_lock));
442 
443 	if (pool_pset_enabled())
444 		zoneid = GLOBAL_ZONEID;
445 	else
446 		zoneid = ALL_ZONES;
447 
448 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
449 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
450 
451 	/*
452 	 * Initialize each PIL's named kstat
453 	 */
454 	if (intr_ksp != NULL) {
455 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
456 		knp = (kstat_named_t *)intr_ksp->ks_data;
457 		intr_ksp->ks_private = cp;
458 		for (i = 0; i < PIL_MAX; i++) {
459 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
460 			    i + 1);
461 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
462 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
463 			    i + 1);
464 			kstat_named_init(&knp[(i * 2) + 1], name,
465 			    KSTAT_DATA_UINT64);
466 		}
467 		kstat_install(intr_ksp);
468 	}
469 }
470 
471 /*
472  * Delete interrupt kstats for this CPU.
473  */
474 void
475 cpu_delete_intrstat(cpu_t *cp)
476 {
477 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
478 }
479 
480 /*
481  * Convert interrupt statistics from CPU ticks to nanoseconds and
482  * update kstat.
483  */
484 int
485 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
486 {
487 	kstat_named_t	*knp = ksp->ks_data;
488 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
489 	int		i;
490 
491 	if (rw == KSTAT_WRITE)
492 		return (EACCES);
493 
494 	/*
495 	 * We use separate passes to copy and convert the statistics to
496 	 * nanoseconds. This assures that the snapshot of the data is as
497 	 * self-consistent as possible.
498 	 */
499 
500 	for (i = 0; i < PIL_MAX; i++) {
501 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
502 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
503 	}
504 
505 	for (i = 0; i < PIL_MAX; i++) {
506 		knp[i * 2].value.ui64 =
507 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
508 		    cpup->cpu_id);
509 	}
510 
511 	return (0);
512 }
513 
514 /*
515  * Called by common/os/cpu.c for psrinfo(1m) kstats
516  */
517 char *
518 cpu_fru_fmri(cpu_t *cp)
519 {
520 	return (cpunodes[cp->cpu_id].fru_fmri);
521 }
522 
523 /*
524  * An interrupt thread is ending a time slice, so compute the interval it
525  * ran for and update the statistic for its PIL.
526  */
527 void
528 cpu_intr_swtch_enter(kthread_id_t t)
529 {
530 	uint64_t	interval;
531 	uint64_t	start;
532 	cpu_t		*cpu;
533 
534 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
535 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
536 
537 	/*
538 	 * We could be here with a zero timestamp. This could happen if:
539 	 * an interrupt thread which no longer has a pinned thread underneath
540 	 * it (i.e. it blocked at some point in its past) has finished running
541 	 * its handler. intr_thread() updated the interrupt statistic for its
542 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
543 	 * return to, swtch() gets called and we end up here.
544 	 *
545 	 * It can also happen if an interrupt thread in intr_thread() calls
546 	 * preempt. It will have already taken care of updating stats. In
547 	 * this event, the interrupt thread will be runnable.
548 	 */
549 	if (t->t_intr_start) {
550 		do {
551 			start = t->t_intr_start;
552 			interval = gettick_counter() - start;
553 		} while (cas64(&t->t_intr_start, start, 0) != start);
554 		cpu = CPU;
555 		if (cpu->cpu_m.divisor > 1)
556 			interval *= cpu->cpu_m.divisor;
557 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
558 
559 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
560 		    interval);
561 	} else
562 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
563 }
564 
565 
566 /*
567  * An interrupt thread is returning from swtch(). Place a starting timestamp
568  * in its thread structure.
569  */
570 void
571 cpu_intr_swtch_exit(kthread_id_t t)
572 {
573 	uint64_t ts;
574 
575 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
576 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
577 
578 	do {
579 		ts = t->t_intr_start;
580 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
581 }
582 
583 
584 int
585 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
586 {
587 	if (&plat_blacklist)
588 		return (plat_blacklist(cmd, scheme, fmri, class));
589 
590 	return (ENOTSUP);
591 }
592 
593 int
594 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
595 {
596 	extern void kdi_flush_caches(void);
597 	size_t nread = 0;
598 	uint32_t word;
599 	int slop, i;
600 
601 	kdi_flush_caches();
602 	membar_enter();
603 
604 	/* We might not begin on a word boundary. */
605 	if ((slop = addr & 3) != 0) {
606 		word = ldphys(addr & ~3);
607 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
608 			*buf++ = ((uchar_t *)&word)[i];
609 		addr = roundup(addr, 4);
610 	}
611 
612 	while (nbytes > 0) {
613 		word = ldphys(addr);
614 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
615 			*buf++ = ((uchar_t *)&word)[i];
616 	}
617 
618 	kdi_flush_caches();
619 
620 	*ncopiedp = nread;
621 	return (0);
622 }
623 
624 int
625 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
626 {
627 	extern void kdi_flush_caches(void);
628 	size_t nwritten = 0;
629 	uint32_t word;
630 	int slop, i;
631 
632 	kdi_flush_caches();
633 
634 	/* We might not begin on a word boundary. */
635 	if ((slop = addr & 3) != 0) {
636 		word = ldphys(addr & ~3);
637 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
638 			((uchar_t *)&word)[i] = *buf++;
639 		stphys(addr & ~3, word);
640 		addr = roundup(addr, 4);
641 	}
642 
643 	while (nbytes > 3) {
644 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
645 			((uchar_t *)&word)[i] = *buf++;
646 		stphys(addr, word);
647 		addr += 4;
648 	}
649 
650 	/* We might not end with a whole word. */
651 	if (nbytes > 0) {
652 		word = ldphys(addr);
653 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
654 			((uchar_t *)&word)[i] = *buf++;
655 		stphys(addr, word);
656 	}
657 
658 	membar_enter();
659 	kdi_flush_caches();
660 
661 	*ncopiedp = nwritten;
662 	return (0);
663 }
664 
665 static void
666 kdi_kernpanic(struct regs *regs, uint_t tt)
667 {
668 	sync_reg_buf = *regs;
669 	sync_tt = tt;
670 
671 	sync_handler();
672 }
673 
674 static void
675 kdi_plat_call(void (*platfn)(void))
676 {
677 	if (platfn != NULL) {
678 		prom_suspend_prepost();
679 		platfn();
680 		prom_resume_prepost();
681 	}
682 }
683 
684 void
685 mach_kdi_init(kdi_t *kdi)
686 {
687 	kdi->kdi_plat_call = kdi_plat_call;
688 	kdi->kdi_kmdb_enter = kmdb_enter;
689 	kdi->mkdi_cpu_index = kdi_cpu_index;
690 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
691 	kdi->mkdi_kernpanic = kdi_kernpanic;
692 }
693 
694 
695 /*
696  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
697  * long, and it fills in the array with the time spent on cpu in
698  * each of the mstates, where time is returned in nsec.
699  *
700  * No guarantee is made that the returned values in times[] will
701  * monotonically increase on sequential calls, although this will
702  * be true in the long run. Any such guarantee must be handled by
703  * the caller, if needed. This can happen if we fail to account
704  * for elapsed time due to a generation counter conflict, yet we
705  * did account for it on a prior call (see below).
706  *
707  * The complication is that the cpu in question may be updating
708  * its microstate at the same time that we are reading it.
709  * Because the microstate is only updated when the CPU's state
710  * changes, the values in cpu_intracct[] can be indefinitely out
711  * of date. To determine true current values, it is necessary to
712  * compare the current time with cpu_mstate_start, and add the
713  * difference to times[cpu_mstate].
714  *
715  * This can be a problem if those values are changing out from
716  * under us. Because the code path in new_cpu_mstate() is
717  * performance critical, we have not added a lock to it. Instead,
718  * we have added a generation counter. Before beginning
719  * modifications, the counter is set to 0. After modifications,
720  * it is set to the old value plus one.
721  *
722  * get_cpu_mstate() will not consider the values of cpu_mstate
723  * and cpu_mstate_start to be usable unless the value of
724  * cpu_mstate_gen is both non-zero and unchanged, both before and
725  * after reading the mstate information. Note that we must
726  * protect against out-of-order loads around accesses to the
727  * generation counter. Also, this is a best effort approach in
728  * that we do not retry should the counter be found to have
729  * changed.
730  *
731  * cpu_intracct[] is used to identify time spent in each CPU
732  * mstate while handling interrupts. Such time should be reported
733  * against system time, and so is subtracted out from its
734  * corresponding cpu_acct[] time and added to
735  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
736  * %ticks, but acct time may be stored as %sticks, thus requiring
737  * different conversions before they can be compared.
738  */
739 
740 void
741 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
742 {
743 	int i;
744 	hrtime_t now, start;
745 	uint16_t gen;
746 	uint16_t state;
747 	hrtime_t intracct[NCMSTATES];
748 
749 	/*
750 	 * Load all volatile state under the protection of membar.
751 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
752 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
753 	 * arrives after we make our last check of cpu_mstate_gen.
754 	 */
755 
756 	now = gethrtime_unscaled();
757 	gen = cpu->cpu_mstate_gen;
758 
759 	membar_consumer();	/* guarantee load ordering */
760 	start = cpu->cpu_mstate_start;
761 	state = cpu->cpu_mstate;
762 	for (i = 0; i < NCMSTATES; i++) {
763 		intracct[i] = cpu->cpu_intracct[i];
764 		times[i] = cpu->cpu_acct[i];
765 	}
766 	membar_consumer();	/* guarantee load ordering */
767 
768 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
769 		times[state] += now - start;
770 
771 	for (i = 0; i < NCMSTATES; i++) {
772 		scalehrtime(&times[i]);
773 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
774 	}
775 
776 	for (i = 0; i < NCMSTATES; i++) {
777 		if (i == CMS_SYSTEM)
778 			continue;
779 		times[i] -= intracct[i];
780 		if (times[i] < 0) {
781 			intracct[i] += times[i];
782 			times[i] = 0;
783 		}
784 		times[CMS_SYSTEM] += intracct[i];
785 	}
786 }
787 
788 void
789 mach_cpu_pause(volatile char *safe)
790 {
791 	/*
792 	 * This cpu is now safe.
793 	 */
794 	*safe = PAUSE_WAIT;
795 	membar_enter(); /* make sure stores are flushed */
796 
797 	/*
798 	 * Now we wait.  When we are allowed to continue, safe
799 	 * will be set to PAUSE_IDLE.
800 	 */
801 	while (*safe != PAUSE_IDLE)
802 		SMT_PAUSE();
803 }
804 
805 /*ARGSUSED*/
806 int
807 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
808 {
809 	return (ENOTSUP);
810 }
811 
812 int
813 dump_plat_addr()
814 {
815 	return (0);
816 }
817 
818 void
819 dump_plat_pfn()
820 {
821 }
822 
823 /* ARGSUSED */
824 int
825 dump_plat_data(void *dump_cdata)
826 {
827 	return (0);
828 }
829 
830 /* ARGSUSED */
831 int
832 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
833 {
834 	return (PLAT_HOLD_OK);
835 }
836 
837 /* ARGSUSED */
838 void
839 plat_release_page(page_t *pp)
840 {
841 }
842