xref: /titanic_41/usr/src/uts/sun4/os/machdep.c (revision b72d5b75fd6f5bb08d29f65652d60058fc3a2608)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kstat.h>
28 #include <sys/param.h>
29 #include <sys/stack.h>
30 #include <sys/regset.h>
31 #include <sys/thread.h>
32 #include <sys/proc.h>
33 #include <sys/procfs_isa.h>
34 #include <sys/kmem.h>
35 #include <sys/cpuvar.h>
36 #include <sys/systm.h>
37 #include <sys/machpcb.h>
38 #include <sys/machasi.h>
39 #include <sys/vis.h>
40 #include <sys/fpu/fpusystm.h>
41 #include <sys/cpu_module.h>
42 #include <sys/privregs.h>
43 #include <sys/archsystm.h>
44 #include <sys/atomic.h>
45 #include <sys/cmn_err.h>
46 #include <sys/time.h>
47 #include <sys/clock.h>
48 #include <sys/cmp.h>
49 #include <sys/platform_module.h>
50 #include <sys/bl.h>
51 #include <sys/nvpair.h>
52 #include <sys/kdi_impl.h>
53 #include <sys/machsystm.h>
54 #include <sys/sysmacros.h>
55 #include <sys/promif.h>
56 #include <sys/pool_pset.h>
57 #include <sys/mem.h>
58 #include <sys/dumphdr.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/hold_page.h>
61 #include <sys/cpu.h>
62 
63 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
64 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
65 
66 /*
67  * Initialize kernel thread's stack.
68  */
69 caddr_t
70 thread_stk_init(caddr_t stk)
71 {
72 	kfpu_t *fp;
73 	ulong_t align;
74 
75 	/* allocate extra space for floating point state */
76 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
77 	align = (uintptr_t)stk & 0x3f;
78 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
79 	fp = (kfpu_t *)stk;
80 	fp->fpu_fprs = 0;
81 
82 	stk -= SA(MINFRAME);
83 	return (stk);
84 }
85 
86 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
87 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
88 
89 kmem_cache_t	*wbuf32_cache;
90 kmem_cache_t	*wbuf64_cache;
91 
92 void
93 lwp_stk_cache_init(void)
94 {
95 	/*
96 	 * Window buffers are allocated from the static arena
97 	 * because they are accessed at TL>0. We also must use
98 	 * KMC_NOHASH to prevent them from straddling page
99 	 * boundaries as they are accessed by physical address.
100 	 */
101 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
102 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
103 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
104 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
105 }
106 
107 /*
108  * Initialize lwp's kernel stack.
109  * Note that now that the floating point register save area (kfpu_t)
110  * has been broken out from machpcb and aligned on a 64 byte boundary so that
111  * we can do block load/stores to/from it, there are a couple of potential
112  * optimizations to save stack space. 1. The floating point register save
113  * area could be aligned on a 16 byte boundary, and the floating point code
114  * changed to (a) check the alignment and (b) use different save/restore
115  * macros depending upon the alignment. 2. The lwp_stk_init code below
116  * could be changed to calculate if less space would be wasted if machpcb
117  * was first instead of second. However there is a REGOFF macro used in
118  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
119  * register area is a fixed distance from the %sp, and would have to be
120  * changed to a pointer or something...JJ said later.
121  */
122 caddr_t
123 lwp_stk_init(klwp_t *lwp, caddr_t stk)
124 {
125 	struct machpcb *mpcb;
126 	kfpu_t *fp;
127 	uintptr_t aln;
128 
129 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
130 	aln = (uintptr_t)stk & 0x3F;
131 	stk -= aln;
132 	fp = (kfpu_t *)stk;
133 	stk -= SA(sizeof (struct machpcb));
134 	mpcb = (struct machpcb *)stk;
135 	bzero(mpcb, sizeof (struct machpcb));
136 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
137 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
138 	lwp->lwp_fpu = (void *)fp;
139 	mpcb->mpcb_fpu = fp;
140 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
141 	mpcb->mpcb_thread = lwp->lwp_thread;
142 	mpcb->mpcb_wbcnt = 0;
143 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
144 		mpcb->mpcb_wstate = WSTATE_USER32;
145 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
146 	} else {
147 		mpcb->mpcb_wstate = WSTATE_USER64;
148 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
149 	}
150 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
151 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
152 	mpcb->mpcb_pa = va_to_pa(mpcb);
153 	return (stk);
154 }
155 
156 void
157 lwp_stk_fini(klwp_t *lwp)
158 {
159 	struct machpcb *mpcb = lwptompcb(lwp);
160 
161 	/*
162 	 * there might be windows still in the wbuf due to unmapped
163 	 * stack, misaligned stack pointer, etc.  We just free it.
164 	 */
165 	mpcb->mpcb_wbcnt = 0;
166 	if (mpcb->mpcb_wstate == WSTATE_USER32)
167 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
168 	else
169 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
170 	mpcb->mpcb_wbuf = NULL;
171 	mpcb->mpcb_wbuf_pa = -1;
172 }
173 
174 
175 /*
176  * Copy regs from parent to child.
177  */
178 void
179 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
180 {
181 	kthread_t *t, *pt = lwptot(lwp);
182 	struct machpcb *mpcb = lwptompcb(clwp);
183 	struct machpcb *pmpcb = lwptompcb(lwp);
184 	kfpu_t *fp, *pfp = lwptofpu(lwp);
185 	caddr_t wbuf;
186 	uint_t wstate;
187 
188 	t = mpcb->mpcb_thread;
189 	/*
190 	 * remember child's fp and wbuf since they will get erased during
191 	 * the bcopy.
192 	 */
193 	fp = mpcb->mpcb_fpu;
194 	wbuf = mpcb->mpcb_wbuf;
195 	wstate = mpcb->mpcb_wstate;
196 	/*
197 	 * Don't copy mpcb_frame since we hand-crafted it
198 	 * in thread_load().
199 	 */
200 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
201 	mpcb->mpcb_thread = t;
202 	mpcb->mpcb_fpu = fp;
203 	fp->fpu_q = mpcb->mpcb_fpu_q;
204 
205 	/*
206 	 * It is theoretically possibly for the lwp's wstate to
207 	 * be different from its value assigned in lwp_stk_init,
208 	 * since lwp_stk_init assumed the data model of the process.
209 	 * Here, we took on the data model of the cloned lwp.
210 	 */
211 	if (mpcb->mpcb_wstate != wstate) {
212 		if (wstate == WSTATE_USER32) {
213 			kmem_cache_free(wbuf32_cache, wbuf);
214 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
215 			wstate = WSTATE_USER64;
216 		} else {
217 			kmem_cache_free(wbuf64_cache, wbuf);
218 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
219 			wstate = WSTATE_USER32;
220 		}
221 	}
222 
223 	mpcb->mpcb_pa = va_to_pa(mpcb);
224 	mpcb->mpcb_wbuf = wbuf;
225 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
226 
227 	ASSERT(mpcb->mpcb_wstate == wstate);
228 
229 	if (mpcb->mpcb_wbcnt != 0) {
230 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
231 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
232 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
233 	}
234 
235 	if (pt == curthread)
236 		pfp->fpu_fprs = _fp_read_fprs();
237 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
238 		if (pt == curthread && fpu_exists) {
239 			save_gsr(clwp->lwp_fpu);
240 		} else {
241 			uint64_t gsr;
242 			gsr = get_gsr(lwp->lwp_fpu);
243 			set_gsr(gsr, clwp->lwp_fpu);
244 		}
245 		fp_fork(lwp, clwp);
246 	}
247 }
248 
249 /*
250  * Free lwp fpu regs.
251  */
252 void
253 lwp_freeregs(klwp_t *lwp, int isexec)
254 {
255 	kfpu_t *fp = lwptofpu(lwp);
256 
257 	if (lwptot(lwp) == curthread)
258 		fp->fpu_fprs = _fp_read_fprs();
259 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
260 		fp_free(fp, isexec);
261 }
262 
263 /*
264  * These function are currently unused on sparc.
265  */
266 /*ARGSUSED*/
267 void
268 lwp_attach_brand_hdlrs(klwp_t *lwp)
269 {}
270 
271 /*ARGSUSED*/
272 void
273 lwp_detach_brand_hdlrs(klwp_t *lwp)
274 {}
275 
276 /*
277  * fill in the extra register state area specified with the
278  * specified lwp's platform-dependent non-floating-point extra
279  * register state information
280  */
281 /* ARGSUSED */
282 void
283 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
284 {
285 	/* for sun4u nothing to do here, added for symmetry */
286 }
287 
288 /*
289  * fill in the extra register state area specified with the specified lwp's
290  * platform-dependent floating-point extra register state information.
291  * NOTE:  'lwp' might not correspond to 'curthread' since this is
292  * called from code in /proc to get the registers of another lwp.
293  */
294 void
295 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
296 {
297 	prxregset_t *xregs = (prxregset_t *)xrp;
298 	kfpu_t *fp = lwptofpu(lwp);
299 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
300 	uint64_t gsr;
301 
302 	/*
303 	 * fp_fksave() does not flush the GSR register into
304 	 * the lwp area, so do it now
305 	 */
306 	kpreempt_disable();
307 	if (ttolwp(curthread) == lwp && fpu_exists) {
308 		fp->fpu_fprs = _fp_read_fprs();
309 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
310 			_fp_write_fprs(fprs);
311 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
312 		}
313 		save_gsr(fp);
314 	}
315 	gsr = get_gsr(fp);
316 	kpreempt_enable();
317 	PRXREG_GSR(xregs) = gsr;
318 }
319 
320 /*
321  * set the specified lwp's platform-dependent non-floating-point
322  * extra register state based on the specified input
323  */
324 /* ARGSUSED */
325 void
326 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
327 {
328 	/* for sun4u nothing to do here, added for symmetry */
329 }
330 
331 /*
332  * set the specified lwp's platform-dependent floating-point
333  * extra register state based on the specified input
334  */
335 void
336 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
337 {
338 	prxregset_t *xregs = (prxregset_t *)xrp;
339 	kfpu_t *fp = lwptofpu(lwp);
340 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
341 	uint64_t gsr = PRXREG_GSR(xregs);
342 
343 	kpreempt_disable();
344 	set_gsr(gsr, lwptofpu(lwp));
345 
346 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
347 		fp->fpu_fprs = _fp_read_fprs();
348 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
349 			_fp_write_fprs(fprs);
350 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
351 		}
352 		restore_gsr(lwptofpu(lwp));
353 	}
354 	kpreempt_enable();
355 }
356 
357 /*
358  * fill in the sun4u asrs, ie, the lwp's platform-dependent
359  * non-floating-point extra register state information
360  */
361 /* ARGSUSED */
362 void
363 getasrs(klwp_t *lwp, asrset_t asr)
364 {
365 	/* for sun4u nothing to do here, added for symmetry */
366 }
367 
368 /*
369  * fill in the sun4u asrs, ie, the lwp's platform-dependent
370  * floating-point extra register state information
371  */
372 void
373 getfpasrs(klwp_t *lwp, asrset_t asr)
374 {
375 	kfpu_t *fp = lwptofpu(lwp);
376 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
377 
378 	kpreempt_disable();
379 	if (ttolwp(curthread) == lwp)
380 		fp->fpu_fprs = _fp_read_fprs();
381 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
382 		if (fpu_exists && ttolwp(curthread) == lwp) {
383 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
384 				_fp_write_fprs(fprs);
385 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
386 			}
387 			save_gsr(fp);
388 		}
389 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
390 	}
391 	kpreempt_enable();
392 }
393 
394 /*
395  * set the sun4u asrs, ie, the lwp's platform-dependent
396  * non-floating-point extra register state information
397  */
398 /* ARGSUSED */
399 void
400 setasrs(klwp_t *lwp, asrset_t asr)
401 {
402 	/* for sun4u nothing to do here, added for symmetry */
403 }
404 
405 void
406 setfpasrs(klwp_t *lwp, asrset_t asr)
407 {
408 	kfpu_t *fp = lwptofpu(lwp);
409 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
410 
411 	kpreempt_disable();
412 	if (ttolwp(curthread) == lwp)
413 		fp->fpu_fprs = _fp_read_fprs();
414 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
415 		set_gsr(asr[ASR_GSR], fp);
416 		if (fpu_exists && ttolwp(curthread) == lwp) {
417 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
418 				_fp_write_fprs(fprs);
419 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
420 			}
421 			restore_gsr(fp);
422 		}
423 	}
424 	kpreempt_enable();
425 }
426 
427 /*
428  * Create interrupt kstats for this CPU.
429  */
430 void
431 cpu_create_intrstat(cpu_t *cp)
432 {
433 	int		i;
434 	kstat_t		*intr_ksp;
435 	kstat_named_t	*knp;
436 	char		name[KSTAT_STRLEN];
437 	zoneid_t	zoneid;
438 
439 	ASSERT(MUTEX_HELD(&cpu_lock));
440 
441 	if (pool_pset_enabled())
442 		zoneid = GLOBAL_ZONEID;
443 	else
444 		zoneid = ALL_ZONES;
445 
446 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
447 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
448 
449 	/*
450 	 * Initialize each PIL's named kstat
451 	 */
452 	if (intr_ksp != NULL) {
453 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
454 		knp = (kstat_named_t *)intr_ksp->ks_data;
455 		intr_ksp->ks_private = cp;
456 		for (i = 0; i < PIL_MAX; i++) {
457 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
458 			    i + 1);
459 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
460 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
461 			    i + 1);
462 			kstat_named_init(&knp[(i * 2) + 1], name,
463 			    KSTAT_DATA_UINT64);
464 		}
465 		kstat_install(intr_ksp);
466 	}
467 }
468 
469 /*
470  * Delete interrupt kstats for this CPU.
471  */
472 void
473 cpu_delete_intrstat(cpu_t *cp)
474 {
475 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
476 }
477 
478 /*
479  * Convert interrupt statistics from CPU ticks to nanoseconds and
480  * update kstat.
481  */
482 int
483 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
484 {
485 	kstat_named_t	*knp = ksp->ks_data;
486 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
487 	int		i;
488 
489 	if (rw == KSTAT_WRITE)
490 		return (EACCES);
491 
492 	/*
493 	 * We use separate passes to copy and convert the statistics to
494 	 * nanoseconds. This assures that the snapshot of the data is as
495 	 * self-consistent as possible.
496 	 */
497 
498 	for (i = 0; i < PIL_MAX; i++) {
499 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
500 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
501 	}
502 
503 	for (i = 0; i < PIL_MAX; i++) {
504 		knp[i * 2].value.ui64 =
505 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
506 		    cpup->cpu_id);
507 	}
508 
509 	return (0);
510 }
511 
512 /*
513  * Called by common/os/cpu.c for psrinfo(1m) kstats
514  */
515 char *
516 cpu_fru_fmri(cpu_t *cp)
517 {
518 	return (cpunodes[cp->cpu_id].fru_fmri);
519 }
520 
521 /*
522  * An interrupt thread is ending a time slice, so compute the interval it
523  * ran for and update the statistic for its PIL.
524  */
525 void
526 cpu_intr_swtch_enter(kthread_id_t t)
527 {
528 	uint64_t	interval;
529 	uint64_t	start;
530 	cpu_t		*cpu;
531 
532 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
533 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
534 
535 	/*
536 	 * We could be here with a zero timestamp. This could happen if:
537 	 * an interrupt thread which no longer has a pinned thread underneath
538 	 * it (i.e. it blocked at some point in its past) has finished running
539 	 * its handler. intr_thread() updated the interrupt statistic for its
540 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
541 	 * return to, swtch() gets called and we end up here.
542 	 *
543 	 * It can also happen if an interrupt thread in intr_thread() calls
544 	 * preempt. It will have already taken care of updating stats. In
545 	 * this event, the interrupt thread will be runnable.
546 	 */
547 	if (t->t_intr_start) {
548 		do {
549 			start = t->t_intr_start;
550 			interval = gettick_counter() - start;
551 		} while (cas64(&t->t_intr_start, start, 0) != start);
552 		cpu = CPU;
553 		if (cpu->cpu_m.divisor > 1)
554 			interval *= cpu->cpu_m.divisor;
555 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
556 
557 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
558 		    interval);
559 	} else
560 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
561 }
562 
563 
564 /*
565  * An interrupt thread is returning from swtch(). Place a starting timestamp
566  * in its thread structure.
567  */
568 void
569 cpu_intr_swtch_exit(kthread_id_t t)
570 {
571 	uint64_t ts;
572 
573 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
574 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
575 
576 	do {
577 		ts = t->t_intr_start;
578 	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
579 }
580 
581 
582 int
583 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
584 {
585 	if (&plat_blacklist)
586 		return (plat_blacklist(cmd, scheme, fmri, class));
587 
588 	return (ENOTSUP);
589 }
590 
591 int
592 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
593 {
594 	extern void kdi_flush_caches(void);
595 	size_t nread = 0;
596 	uint32_t word;
597 	int slop, i;
598 
599 	kdi_flush_caches();
600 	membar_enter();
601 
602 	/* We might not begin on a word boundary. */
603 	if ((slop = addr & 3) != 0) {
604 		word = ldphys(addr & ~3);
605 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
606 			*buf++ = ((uchar_t *)&word)[i];
607 		addr = roundup(addr, 4);
608 	}
609 
610 	while (nbytes > 0) {
611 		word = ldphys(addr);
612 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
613 			*buf++ = ((uchar_t *)&word)[i];
614 	}
615 
616 	kdi_flush_caches();
617 
618 	*ncopiedp = nread;
619 	return (0);
620 }
621 
622 int
623 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
624 {
625 	extern void kdi_flush_caches(void);
626 	size_t nwritten = 0;
627 	uint32_t word;
628 	int slop, i;
629 
630 	kdi_flush_caches();
631 
632 	/* We might not begin on a word boundary. */
633 	if ((slop = addr & 3) != 0) {
634 		word = ldphys(addr & ~3);
635 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
636 			((uchar_t *)&word)[i] = *buf++;
637 		stphys(addr & ~3, word);
638 		addr = roundup(addr, 4);
639 	}
640 
641 	while (nbytes > 3) {
642 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
643 			((uchar_t *)&word)[i] = *buf++;
644 		stphys(addr, word);
645 		addr += 4;
646 	}
647 
648 	/* We might not end with a whole word. */
649 	if (nbytes > 0) {
650 		word = ldphys(addr);
651 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
652 			((uchar_t *)&word)[i] = *buf++;
653 		stphys(addr, word);
654 	}
655 
656 	membar_enter();
657 	kdi_flush_caches();
658 
659 	*ncopiedp = nwritten;
660 	return (0);
661 }
662 
663 static void
664 kdi_kernpanic(struct regs *regs, uint_t tt)
665 {
666 	sync_reg_buf = *regs;
667 	sync_tt = tt;
668 
669 	sync_handler();
670 }
671 
672 static void
673 kdi_plat_call(void (*platfn)(void))
674 {
675 	if (platfn != NULL) {
676 		prom_suspend_prepost();
677 		platfn();
678 		prom_resume_prepost();
679 	}
680 }
681 
682 void
683 mach_kdi_init(kdi_t *kdi)
684 {
685 	kdi->kdi_plat_call = kdi_plat_call;
686 	kdi->kdi_kmdb_enter = kmdb_enter;
687 	kdi->mkdi_cpu_index = kdi_cpu_index;
688 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
689 	kdi->mkdi_kernpanic = kdi_kernpanic;
690 }
691 
692 
693 /*
694  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
695  * long, and it fills in the array with the time spent on cpu in
696  * each of the mstates, where time is returned in nsec.
697  *
698  * No guarantee is made that the returned values in times[] will
699  * monotonically increase on sequential calls, although this will
700  * be true in the long run. Any such guarantee must be handled by
701  * the caller, if needed. This can happen if we fail to account
702  * for elapsed time due to a generation counter conflict, yet we
703  * did account for it on a prior call (see below).
704  *
705  * The complication is that the cpu in question may be updating
706  * its microstate at the same time that we are reading it.
707  * Because the microstate is only updated when the CPU's state
708  * changes, the values in cpu_intracct[] can be indefinitely out
709  * of date. To determine true current values, it is necessary to
710  * compare the current time with cpu_mstate_start, and add the
711  * difference to times[cpu_mstate].
712  *
713  * This can be a problem if those values are changing out from
714  * under us. Because the code path in new_cpu_mstate() is
715  * performance critical, we have not added a lock to it. Instead,
716  * we have added a generation counter. Before beginning
717  * modifications, the counter is set to 0. After modifications,
718  * it is set to the old value plus one.
719  *
720  * get_cpu_mstate() will not consider the values of cpu_mstate
721  * and cpu_mstate_start to be usable unless the value of
722  * cpu_mstate_gen is both non-zero and unchanged, both before and
723  * after reading the mstate information. Note that we must
724  * protect against out-of-order loads around accesses to the
725  * generation counter. Also, this is a best effort approach in
726  * that we do not retry should the counter be found to have
727  * changed.
728  *
729  * cpu_intracct[] is used to identify time spent in each CPU
730  * mstate while handling interrupts. Such time should be reported
731  * against system time, and so is subtracted out from its
732  * corresponding cpu_acct[] time and added to
733  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
734  * %ticks, but acct time may be stored as %sticks, thus requiring
735  * different conversions before they can be compared.
736  */
737 
738 void
739 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
740 {
741 	int i;
742 	hrtime_t now, start;
743 	uint16_t gen;
744 	uint16_t state;
745 	hrtime_t intracct[NCMSTATES];
746 
747 	/*
748 	 * Load all volatile state under the protection of membar.
749 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
750 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
751 	 * arrives after we make our last check of cpu_mstate_gen.
752 	 */
753 
754 	now = gethrtime_unscaled();
755 	gen = cpu->cpu_mstate_gen;
756 
757 	membar_consumer();	/* guarantee load ordering */
758 	start = cpu->cpu_mstate_start;
759 	state = cpu->cpu_mstate;
760 	for (i = 0; i < NCMSTATES; i++) {
761 		intracct[i] = cpu->cpu_intracct[i];
762 		times[i] = cpu->cpu_acct[i];
763 	}
764 	membar_consumer();	/* guarantee load ordering */
765 
766 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
767 		times[state] += now - start;
768 
769 	for (i = 0; i < NCMSTATES; i++) {
770 		scalehrtime(&times[i]);
771 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
772 	}
773 
774 	for (i = 0; i < NCMSTATES; i++) {
775 		if (i == CMS_SYSTEM)
776 			continue;
777 		times[i] -= intracct[i];
778 		if (times[i] < 0) {
779 			intracct[i] += times[i];
780 			times[i] = 0;
781 		}
782 		times[CMS_SYSTEM] += intracct[i];
783 	}
784 }
785 
786 void
787 mach_cpu_pause(volatile char *safe)
788 {
789 	/*
790 	 * This cpu is now safe.
791 	 */
792 	*safe = PAUSE_WAIT;
793 	membar_enter(); /* make sure stores are flushed */
794 
795 	/*
796 	 * Now we wait.  When we are allowed to continue, safe
797 	 * will be set to PAUSE_IDLE.
798 	 */
799 	while (*safe != PAUSE_IDLE)
800 		SMT_PAUSE();
801 }
802 
803 /*ARGSUSED*/
804 int
805 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
806 {
807 	return (ENOTSUP);
808 }
809 
810 int
811 dump_plat_addr()
812 {
813 	return (0);
814 }
815 
816 void
817 dump_plat_pfn()
818 {
819 }
820 
821 /* ARGSUSED */
822 int
823 dump_plat_data(void *dump_cdata)
824 {
825 	return (0);
826 }
827 
828 /* ARGSUSED */
829 int
830 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
831 {
832 	return (PLAT_HOLD_OK);
833 }
834 
835 /* ARGSUSED */
836 void
837 plat_release_page(page_t *pp)
838 {
839 }
840 
841 /* ARGSUSED */
842 void
843 progressbar_key_abort(ldi_ident_t li)
844 {
845 }
846