xref: /illumos-gate/usr/src/uts/sun4/os/machdep.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kstat.h>
28 #include <sys/param.h>
29 #include <sys/stack.h>
30 #include <sys/regset.h>
31 #include <sys/thread.h>
32 #include <sys/proc.h>
33 #include <sys/procfs_isa.h>
34 #include <sys/kmem.h>
35 #include <sys/cpuvar.h>
36 #include <sys/systm.h>
37 #include <sys/machpcb.h>
38 #include <sys/machasi.h>
39 #include <sys/vis.h>
40 #include <sys/fpu/fpusystm.h>
41 #include <sys/cpu_module.h>
42 #include <sys/privregs.h>
43 #include <sys/archsystm.h>
44 #include <sys/atomic.h>
45 #include <sys/cmn_err.h>
46 #include <sys/time.h>
47 #include <sys/clock.h>
48 #include <sys/cmp.h>
49 #include <sys/platform_module.h>
50 #include <sys/bl.h>
51 #include <sys/nvpair.h>
52 #include <sys/kdi_impl.h>
53 #include <sys/machsystm.h>
54 #include <sys/sysmacros.h>
55 #include <sys/promif.h>
56 #include <sys/pool_pset.h>
57 #include <sys/mem.h>
58 #include <sys/dumphdr.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/hold_page.h>
61 #include <sys/cpu.h>
62 #include <sys/ivintr.h>
63 #include <sys/clock_impl.h>
64 #include <sys/machclock.h>
65 
66 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
67 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
68 
69 /*
70  * Initialize kernel thread's stack.
71  */
72 caddr_t
73 thread_stk_init(caddr_t stk)
74 {
75 	kfpu_t *fp;
76 	ulong_t align;
77 
78 	/* allocate extra space for floating point state */
79 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
80 	align = (uintptr_t)stk & 0x3f;
81 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
82 	fp = (kfpu_t *)stk;
83 	fp->fpu_fprs = 0;
84 
85 	stk -= SA(MINFRAME);
86 	return (stk);
87 }
88 
89 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
90 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
91 
92 kmem_cache_t	*wbuf32_cache;
93 kmem_cache_t	*wbuf64_cache;
94 
95 void
96 lwp_stk_cache_init(void)
97 {
98 	/*
99 	 * Window buffers are allocated from the static arena
100 	 * because they are accessed at TL>0. We also must use
101 	 * KMC_NOHASH to prevent them from straddling page
102 	 * boundaries as they are accessed by physical address.
103 	 */
104 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
105 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
106 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
107 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
108 }
109 
110 /*
111  * Initialize lwp's kernel stack.
112  * Note that now that the floating point register save area (kfpu_t)
113  * has been broken out from machpcb and aligned on a 64 byte boundary so that
114  * we can do block load/stores to/from it, there are a couple of potential
115  * optimizations to save stack space. 1. The floating point register save
116  * area could be aligned on a 16 byte boundary, and the floating point code
117  * changed to (a) check the alignment and (b) use different save/restore
118  * macros depending upon the alignment. 2. The lwp_stk_init code below
119  * could be changed to calculate if less space would be wasted if machpcb
120  * was first instead of second. However there is a REGOFF macro used in
121  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
122  * register area is a fixed distance from the %sp, and would have to be
123  * changed to a pointer or something...JJ said later.
124  */
125 caddr_t
126 lwp_stk_init(klwp_t *lwp, caddr_t stk)
127 {
128 	struct machpcb *mpcb;
129 	kfpu_t *fp;
130 	uintptr_t aln;
131 
132 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
133 	aln = (uintptr_t)stk & 0x3F;
134 	stk -= aln;
135 	fp = (kfpu_t *)stk;
136 	stk -= SA(sizeof (struct machpcb));
137 	mpcb = (struct machpcb *)stk;
138 	bzero(mpcb, sizeof (struct machpcb));
139 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
140 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
141 	lwp->lwp_fpu = (void *)fp;
142 	mpcb->mpcb_fpu = fp;
143 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
144 	mpcb->mpcb_thread = lwp->lwp_thread;
145 	mpcb->mpcb_wbcnt = 0;
146 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
147 		mpcb->mpcb_wstate = WSTATE_USER32;
148 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
149 	} else {
150 		mpcb->mpcb_wstate = WSTATE_USER64;
151 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
152 	}
153 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
154 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
155 	mpcb->mpcb_pa = va_to_pa(mpcb);
156 	return (stk);
157 }
158 
159 void
160 lwp_stk_fini(klwp_t *lwp)
161 {
162 	struct machpcb *mpcb = lwptompcb(lwp);
163 
164 	/*
165 	 * there might be windows still in the wbuf due to unmapped
166 	 * stack, misaligned stack pointer, etc.  We just free it.
167 	 */
168 	mpcb->mpcb_wbcnt = 0;
169 	if (mpcb->mpcb_wstate == WSTATE_USER32)
170 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
171 	else
172 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
173 	mpcb->mpcb_wbuf = NULL;
174 	mpcb->mpcb_wbuf_pa = -1;
175 }
176 
177 /*ARGSUSED*/
178 void
179 lwp_fp_init(klwp_t *lwp)
180 {
181 }
182 
183 /*
184  * Copy regs from parent to child.
185  */
186 void
187 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
188 {
189 	kthread_t *t, *pt = lwptot(lwp);
190 	struct machpcb *mpcb = lwptompcb(clwp);
191 	struct machpcb *pmpcb = lwptompcb(lwp);
192 	kfpu_t *fp, *pfp = lwptofpu(lwp);
193 	caddr_t wbuf;
194 	uint_t wstate;
195 
196 	t = mpcb->mpcb_thread;
197 	/*
198 	 * remember child's fp and wbuf since they will get erased during
199 	 * the bcopy.
200 	 */
201 	fp = mpcb->mpcb_fpu;
202 	wbuf = mpcb->mpcb_wbuf;
203 	wstate = mpcb->mpcb_wstate;
204 	/*
205 	 * Don't copy mpcb_frame since we hand-crafted it
206 	 * in thread_load().
207 	 */
208 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
209 	mpcb->mpcb_thread = t;
210 	mpcb->mpcb_fpu = fp;
211 	fp->fpu_q = mpcb->mpcb_fpu_q;
212 
213 	/*
214 	 * It is theoretically possibly for the lwp's wstate to
215 	 * be different from its value assigned in lwp_stk_init,
216 	 * since lwp_stk_init assumed the data model of the process.
217 	 * Here, we took on the data model of the cloned lwp.
218 	 */
219 	if (mpcb->mpcb_wstate != wstate) {
220 		if (wstate == WSTATE_USER32) {
221 			kmem_cache_free(wbuf32_cache, wbuf);
222 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
223 			wstate = WSTATE_USER64;
224 		} else {
225 			kmem_cache_free(wbuf64_cache, wbuf);
226 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
227 			wstate = WSTATE_USER32;
228 		}
229 	}
230 
231 	mpcb->mpcb_pa = va_to_pa(mpcb);
232 	mpcb->mpcb_wbuf = wbuf;
233 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
234 
235 	ASSERT(mpcb->mpcb_wstate == wstate);
236 
237 	if (mpcb->mpcb_wbcnt != 0) {
238 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
239 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
240 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
241 	}
242 
243 	if (pt == curthread)
244 		pfp->fpu_fprs = _fp_read_fprs();
245 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
246 		if (pt == curthread && fpu_exists) {
247 			save_gsr(clwp->lwp_fpu);
248 		} else {
249 			uint64_t gsr;
250 			gsr = get_gsr(lwp->lwp_fpu);
251 			set_gsr(gsr, clwp->lwp_fpu);
252 		}
253 		fp_fork(lwp, clwp);
254 	}
255 }
256 
257 /*
258  * Free lwp fpu regs.
259  */
260 void
261 lwp_freeregs(klwp_t *lwp, int isexec)
262 {
263 	kfpu_t *fp = lwptofpu(lwp);
264 
265 	if (lwptot(lwp) == curthread)
266 		fp->fpu_fprs = _fp_read_fprs();
267 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
268 		fp_free(fp, isexec);
269 }
270 
271 /*
272  * These function are currently unused on sparc.
273  */
274 /*ARGSUSED*/
275 void
276 lwp_attach_brand_hdlrs(klwp_t *lwp)
277 {}
278 
279 /*ARGSUSED*/
280 void
281 lwp_detach_brand_hdlrs(klwp_t *lwp)
282 {}
283 
284 /*
285  * fill in the extra register state area specified with the
286  * specified lwp's platform-dependent non-floating-point extra
287  * register state information
288  */
289 /* ARGSUSED */
290 void
291 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
292 {
293 	/* for sun4u nothing to do here, added for symmetry */
294 }
295 
296 /*
297  * fill in the extra register state area specified with the specified lwp's
298  * platform-dependent floating-point extra register state information.
299  * NOTE:  'lwp' might not correspond to 'curthread' since this is
300  * called from code in /proc to get the registers of another lwp.
301  */
302 void
303 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
304 {
305 	prxregset_t *xregs = (prxregset_t *)xrp;
306 	kfpu_t *fp = lwptofpu(lwp);
307 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
308 	uint64_t gsr;
309 
310 	/*
311 	 * fp_fksave() does not flush the GSR register into
312 	 * the lwp area, so do it now
313 	 */
314 	kpreempt_disable();
315 	if (ttolwp(curthread) == lwp && fpu_exists) {
316 		fp->fpu_fprs = _fp_read_fprs();
317 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
318 			_fp_write_fprs(fprs);
319 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
320 		}
321 		save_gsr(fp);
322 	}
323 	gsr = get_gsr(fp);
324 	kpreempt_enable();
325 	PRXREG_GSR(xregs) = gsr;
326 }
327 
328 /*
329  * set the specified lwp's platform-dependent non-floating-point
330  * extra register state based on the specified input
331  */
332 /* ARGSUSED */
333 void
334 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
335 {
336 	/* for sun4u nothing to do here, added for symmetry */
337 }
338 
339 /*
340  * set the specified lwp's platform-dependent floating-point
341  * extra register state based on the specified input
342  */
343 void
344 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
345 {
346 	prxregset_t *xregs = (prxregset_t *)xrp;
347 	kfpu_t *fp = lwptofpu(lwp);
348 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
349 	uint64_t gsr = PRXREG_GSR(xregs);
350 
351 	kpreempt_disable();
352 	set_gsr(gsr, lwptofpu(lwp));
353 
354 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
355 		fp->fpu_fprs = _fp_read_fprs();
356 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
357 			_fp_write_fprs(fprs);
358 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
359 		}
360 		restore_gsr(lwptofpu(lwp));
361 	}
362 	kpreempt_enable();
363 }
364 
365 /*
366  * fill in the sun4u asrs, ie, the lwp's platform-dependent
367  * non-floating-point extra register state information
368  */
369 /* ARGSUSED */
370 void
371 getasrs(klwp_t *lwp, asrset_t asr)
372 {
373 	/* for sun4u nothing to do here, added for symmetry */
374 }
375 
376 /*
377  * fill in the sun4u asrs, ie, the lwp's platform-dependent
378  * floating-point extra register state information
379  */
380 void
381 getfpasrs(klwp_t *lwp, asrset_t asr)
382 {
383 	kfpu_t *fp = lwptofpu(lwp);
384 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
385 
386 	kpreempt_disable();
387 	if (ttolwp(curthread) == lwp)
388 		fp->fpu_fprs = _fp_read_fprs();
389 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
390 		if (fpu_exists && ttolwp(curthread) == lwp) {
391 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
392 				_fp_write_fprs(fprs);
393 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
394 			}
395 			save_gsr(fp);
396 		}
397 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
398 	}
399 	kpreempt_enable();
400 }
401 
402 /*
403  * set the sun4u asrs, ie, the lwp's platform-dependent
404  * non-floating-point extra register state information
405  */
406 /* ARGSUSED */
407 void
408 setasrs(klwp_t *lwp, asrset_t asr)
409 {
410 	/* for sun4u nothing to do here, added for symmetry */
411 }
412 
413 void
414 setfpasrs(klwp_t *lwp, asrset_t asr)
415 {
416 	kfpu_t *fp = lwptofpu(lwp);
417 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
418 
419 	kpreempt_disable();
420 	if (ttolwp(curthread) == lwp)
421 		fp->fpu_fprs = _fp_read_fprs();
422 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
423 		set_gsr(asr[ASR_GSR], fp);
424 		if (fpu_exists && ttolwp(curthread) == lwp) {
425 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
426 				_fp_write_fprs(fprs);
427 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
428 			}
429 			restore_gsr(fp);
430 		}
431 	}
432 	kpreempt_enable();
433 }
434 
435 /*
436  * Create interrupt kstats for this CPU.
437  */
438 void
439 cpu_create_intrstat(cpu_t *cp)
440 {
441 	int		i;
442 	kstat_t		*intr_ksp;
443 	kstat_named_t	*knp;
444 	char		name[KSTAT_STRLEN];
445 	zoneid_t	zoneid;
446 
447 	ASSERT(MUTEX_HELD(&cpu_lock));
448 
449 	if (pool_pset_enabled())
450 		zoneid = GLOBAL_ZONEID;
451 	else
452 		zoneid = ALL_ZONES;
453 
454 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
455 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
456 
457 	/*
458 	 * Initialize each PIL's named kstat
459 	 */
460 	if (intr_ksp != NULL) {
461 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
462 		knp = (kstat_named_t *)intr_ksp->ks_data;
463 		intr_ksp->ks_private = cp;
464 		for (i = 0; i < PIL_MAX; i++) {
465 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
466 			    i + 1);
467 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
468 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
469 			    i + 1);
470 			kstat_named_init(&knp[(i * 2) + 1], name,
471 			    KSTAT_DATA_UINT64);
472 		}
473 		kstat_install(intr_ksp);
474 	}
475 }
476 
477 /*
478  * Delete interrupt kstats for this CPU.
479  */
480 void
481 cpu_delete_intrstat(cpu_t *cp)
482 {
483 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
484 }
485 
486 /*
487  * Convert interrupt statistics from CPU ticks to nanoseconds and
488  * update kstat.
489  */
490 int
491 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
492 {
493 	kstat_named_t	*knp = ksp->ks_data;
494 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
495 	int		i;
496 
497 	if (rw == KSTAT_WRITE)
498 		return (EACCES);
499 
500 	/*
501 	 * We use separate passes to copy and convert the statistics to
502 	 * nanoseconds. This assures that the snapshot of the data is as
503 	 * self-consistent as possible.
504 	 */
505 
506 	for (i = 0; i < PIL_MAX; i++) {
507 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
508 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
509 	}
510 
511 	for (i = 0; i < PIL_MAX; i++) {
512 		knp[i * 2].value.ui64 =
513 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
514 		    cpup->cpu_id);
515 	}
516 
517 	return (0);
518 }
519 
520 /*
521  * Called by common/os/cpu.c for psrinfo(1m) kstats
522  */
523 char *
524 cpu_fru_fmri(cpu_t *cp)
525 {
526 	return (cpunodes[cp->cpu_id].fru_fmri);
527 }
528 
529 /*
530  * An interrupt thread is ending a time slice, so compute the interval it
531  * ran for and update the statistic for its PIL.
532  */
533 void
534 cpu_intr_swtch_enter(kthread_id_t t)
535 {
536 	uint64_t	interval;
537 	uint64_t	start;
538 	cpu_t		*cpu;
539 
540 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
541 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
542 
543 	/*
544 	 * We could be here with a zero timestamp. This could happen if:
545 	 * an interrupt thread which no longer has a pinned thread underneath
546 	 * it (i.e. it blocked at some point in its past) has finished running
547 	 * its handler. intr_thread() updated the interrupt statistic for its
548 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
549 	 * return to, swtch() gets called and we end up here.
550 	 *
551 	 * It can also happen if an interrupt thread in intr_thread() calls
552 	 * preempt. It will have already taken care of updating stats. In
553 	 * this event, the interrupt thread will be runnable.
554 	 */
555 	if (t->t_intr_start) {
556 		do {
557 			start = t->t_intr_start;
558 			interval = CLOCK_TICK_COUNTER() - start;
559 		} while (atomic_cas_64(&t->t_intr_start, start, 0) != start);
560 		cpu = CPU;
561 		if (cpu->cpu_m.divisor > 1)
562 			interval *= cpu->cpu_m.divisor;
563 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
564 
565 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
566 		    interval);
567 	} else
568 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
569 }
570 
571 
572 /*
573  * An interrupt thread is returning from swtch(). Place a starting timestamp
574  * in its thread structure.
575  */
576 void
577 cpu_intr_swtch_exit(kthread_id_t t)
578 {
579 	uint64_t ts;
580 
581 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
582 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
583 
584 	do {
585 		ts = t->t_intr_start;
586 	} while (atomic_cas_64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) !=
587 	    ts);
588 }
589 
590 
591 int
592 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
593 {
594 	if (&plat_blacklist)
595 		return (plat_blacklist(cmd, scheme, fmri, class));
596 
597 	return (ENOTSUP);
598 }
599 
600 int
601 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
602 {
603 	extern void kdi_flush_caches(void);
604 	size_t nread = 0;
605 	uint32_t word;
606 	int slop, i;
607 
608 	kdi_flush_caches();
609 	membar_enter();
610 
611 	/* We might not begin on a word boundary. */
612 	if ((slop = addr & 3) != 0) {
613 		word = ldphys(addr & ~3);
614 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
615 			*buf++ = ((uchar_t *)&word)[i];
616 		addr = roundup(addr, 4);
617 	}
618 
619 	while (nbytes > 0) {
620 		word = ldphys(addr);
621 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
622 			*buf++ = ((uchar_t *)&word)[i];
623 	}
624 
625 	kdi_flush_caches();
626 
627 	*ncopiedp = nread;
628 	return (0);
629 }
630 
631 int
632 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
633 {
634 	extern void kdi_flush_caches(void);
635 	size_t nwritten = 0;
636 	uint32_t word;
637 	int slop, i;
638 
639 	kdi_flush_caches();
640 
641 	/* We might not begin on a word boundary. */
642 	if ((slop = addr & 3) != 0) {
643 		word = ldphys(addr & ~3);
644 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
645 			((uchar_t *)&word)[i] = *buf++;
646 		stphys(addr & ~3, word);
647 		addr = roundup(addr, 4);
648 	}
649 
650 	while (nbytes > 3) {
651 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
652 			((uchar_t *)&word)[i] = *buf++;
653 		stphys(addr, word);
654 		addr += 4;
655 	}
656 
657 	/* We might not end with a whole word. */
658 	if (nbytes > 0) {
659 		word = ldphys(addr);
660 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
661 			((uchar_t *)&word)[i] = *buf++;
662 		stphys(addr, word);
663 	}
664 
665 	membar_enter();
666 	kdi_flush_caches();
667 
668 	*ncopiedp = nwritten;
669 	return (0);
670 }
671 
672 static void
673 kdi_kernpanic(struct regs *regs, uint_t tt)
674 {
675 	sync_reg_buf = *regs;
676 	sync_tt = tt;
677 
678 	sync_handler();
679 }
680 
681 static void
682 kdi_plat_call(void (*platfn)(void))
683 {
684 	if (platfn != NULL) {
685 		prom_suspend_prepost();
686 		platfn();
687 		prom_resume_prepost();
688 	}
689 }
690 
691 /*
692  * kdi_system_claim and release are defined here for all sun4 platforms and
693  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
694  * Specific sun4u or sun4v platforms may implement their own claim and release
695  * routines, at which point their respective callbacks will be updated.
696  */
697 static void
698 kdi_system_claim(void)
699 {
700 	lbolt_debug_entry();
701 }
702 
703 static void
704 kdi_system_release(void)
705 {
706 	lbolt_debug_return();
707 }
708 
709 void
710 mach_kdi_init(kdi_t *kdi)
711 {
712 	kdi->kdi_plat_call = kdi_plat_call;
713 	kdi->kdi_kmdb_enter = kmdb_enter;
714 	kdi->pkdi_system_claim = kdi_system_claim;
715 	kdi->pkdi_system_release = kdi_system_release;
716 	kdi->mkdi_cpu_index = kdi_cpu_index;
717 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
718 	kdi->mkdi_kernpanic = kdi_kernpanic;
719 }
720 
721 
722 /*
723  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
724  * long, and it fills in the array with the time spent on cpu in
725  * each of the mstates, where time is returned in nsec.
726  *
727  * No guarantee is made that the returned values in times[] will
728  * monotonically increase on sequential calls, although this will
729  * be true in the long run. Any such guarantee must be handled by
730  * the caller, if needed. This can happen if we fail to account
731  * for elapsed time due to a generation counter conflict, yet we
732  * did account for it on a prior call (see below).
733  *
734  * The complication is that the cpu in question may be updating
735  * its microstate at the same time that we are reading it.
736  * Because the microstate is only updated when the CPU's state
737  * changes, the values in cpu_intracct[] can be indefinitely out
738  * of date. To determine true current values, it is necessary to
739  * compare the current time with cpu_mstate_start, and add the
740  * difference to times[cpu_mstate].
741  *
742  * This can be a problem if those values are changing out from
743  * under us. Because the code path in new_cpu_mstate() is
744  * performance critical, we have not added a lock to it. Instead,
745  * we have added a generation counter. Before beginning
746  * modifications, the counter is set to 0. After modifications,
747  * it is set to the old value plus one.
748  *
749  * get_cpu_mstate() will not consider the values of cpu_mstate
750  * and cpu_mstate_start to be usable unless the value of
751  * cpu_mstate_gen is both non-zero and unchanged, both before and
752  * after reading the mstate information. Note that we must
753  * protect against out-of-order loads around accesses to the
754  * generation counter. Also, this is a best effort approach in
755  * that we do not retry should the counter be found to have
756  * changed.
757  *
758  * cpu_intracct[] is used to identify time spent in each CPU
759  * mstate while handling interrupts. Such time should be reported
760  * against system time, and so is subtracted out from its
761  * corresponding cpu_acct[] time and added to
762  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
763  * %ticks, but acct time may be stored as %sticks, thus requiring
764  * different conversions before they can be compared.
765  */
766 
767 void
768 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
769 {
770 	int i;
771 	hrtime_t now, start;
772 	uint16_t gen;
773 	uint16_t state;
774 	hrtime_t intracct[NCMSTATES];
775 
776 	/*
777 	 * Load all volatile state under the protection of membar.
778 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
779 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
780 	 * arrives after we make our last check of cpu_mstate_gen.
781 	 */
782 
783 	now = gethrtime_unscaled();
784 	gen = cpu->cpu_mstate_gen;
785 
786 	membar_consumer();	/* guarantee load ordering */
787 	start = cpu->cpu_mstate_start;
788 	state = cpu->cpu_mstate;
789 	for (i = 0; i < NCMSTATES; i++) {
790 		intracct[i] = cpu->cpu_intracct[i];
791 		times[i] = cpu->cpu_acct[i];
792 	}
793 	membar_consumer();	/* guarantee load ordering */
794 
795 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
796 		times[state] += now - start;
797 
798 	for (i = 0; i < NCMSTATES; i++) {
799 		scalehrtime(&times[i]);
800 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
801 	}
802 
803 	for (i = 0; i < NCMSTATES; i++) {
804 		if (i == CMS_SYSTEM)
805 			continue;
806 		times[i] -= intracct[i];
807 		if (times[i] < 0) {
808 			intracct[i] += times[i];
809 			times[i] = 0;
810 		}
811 		times[CMS_SYSTEM] += intracct[i];
812 	}
813 }
814 
815 void
816 mach_cpu_pause(volatile char *safe)
817 {
818 	/*
819 	 * This cpu is now safe.
820 	 */
821 	*safe = PAUSE_WAIT;
822 	membar_enter(); /* make sure stores are flushed */
823 
824 	/*
825 	 * Now we wait.  When we are allowed to continue, safe
826 	 * will be set to PAUSE_IDLE.
827 	 */
828 	while (*safe != PAUSE_IDLE)
829 		SMT_PAUSE();
830 }
831 
832 /*ARGSUSED*/
833 int
834 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
835 {
836 	return (ENOTSUP);
837 }
838 
839 /* cpu threshold for compressed dumps */
840 #ifdef sun4v
841 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4V_MINCPU;
842 #else
843 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4U_MINCPU;
844 #endif
845 
846 int
847 dump_plat_addr()
848 {
849 	return (0);
850 }
851 
852 void
853 dump_plat_pfn()
854 {
855 }
856 
857 /* ARGSUSED */
858 int
859 dump_plat_data(void *dump_cdata)
860 {
861 	return (0);
862 }
863 
864 /* ARGSUSED */
865 int
866 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
867 {
868 	return (PLAT_HOLD_OK);
869 }
870 
871 /* ARGSUSED */
872 void
873 plat_release_page(page_t *pp)
874 {
875 }
876 
877 /* ARGSUSED */
878 void
879 progressbar_key_abort(ldi_ident_t li)
880 {
881 }
882 
883 /*
884  * We need to post a soft interrupt to reprogram the lbolt cyclic when
885  * switching from event to cyclic driven lbolt. The following code adds
886  * and posts the softint for sun4 platforms.
887  */
888 static uint64_t lbolt_softint_inum;
889 
890 void
891 lbolt_softint_add(void)
892 {
893 	lbolt_softint_inum = add_softintr(LOCK_LEVEL,
894 	    (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
895 }
896 
897 void
898 lbolt_softint_post(void)
899 {
900 	setsoftint(lbolt_softint_inum);
901 }
902