xref: /titanic_51/usr/src/uts/sun4/os/machdep.c (revision 3f1de28d23e9724b017260ef6b282b278b6e38f9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/kstat.h>
27 #include <sys/param.h>
28 #include <sys/stack.h>
29 #include <sys/regset.h>
30 #include <sys/thread.h>
31 #include <sys/proc.h>
32 #include <sys/procfs_isa.h>
33 #include <sys/kmem.h>
34 #include <sys/cpuvar.h>
35 #include <sys/systm.h>
36 #include <sys/machpcb.h>
37 #include <sys/machasi.h>
38 #include <sys/vis.h>
39 #include <sys/fpu/fpusystm.h>
40 #include <sys/cpu_module.h>
41 #include <sys/privregs.h>
42 #include <sys/archsystm.h>
43 #include <sys/atomic.h>
44 #include <sys/cmn_err.h>
45 #include <sys/time.h>
46 #include <sys/clock.h>
47 #include <sys/cmp.h>
48 #include <sys/platform_module.h>
49 #include <sys/bl.h>
50 #include <sys/nvpair.h>
51 #include <sys/kdi_impl.h>
52 #include <sys/machsystm.h>
53 #include <sys/sysmacros.h>
54 #include <sys/promif.h>
55 #include <sys/pool_pset.h>
56 #include <sys/mem.h>
57 #include <sys/dumphdr.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/hold_page.h>
60 #include <sys/cpu.h>
61 #include <sys/ivintr.h>
62 #include <sys/clock_impl.h>
63 #include <sys/machclock.h>
64 
65 int maxphys = MMU_PAGESIZE * 16;	/* 128k */
66 int klustsize = MMU_PAGESIZE * 16;	/* 128k */
67 
68 /*
69  * Initialize kernel thread's stack.
70  */
71 caddr_t
72 thread_stk_init(caddr_t stk)
73 {
74 	kfpu_t *fp;
75 	ulong_t align;
76 
77 	/* allocate extra space for floating point state */
78 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
79 	align = (uintptr_t)stk & 0x3f;
80 	stk -= align;		/* force v9_fpu to be 16 byte aligned */
81 	fp = (kfpu_t *)stk;
82 	fp->fpu_fprs = 0;
83 
84 	stk -= SA(MINFRAME);
85 	return (stk);
86 }
87 
88 #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
89 #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
90 
91 kmem_cache_t	*wbuf32_cache;
92 kmem_cache_t	*wbuf64_cache;
93 
94 void
95 lwp_stk_cache_init(void)
96 {
97 	/*
98 	 * Window buffers are allocated from the static arena
99 	 * because they are accessed at TL>0. We also must use
100 	 * KMC_NOHASH to prevent them from straddling page
101 	 * boundaries as they are accessed by physical address.
102 	 */
103 	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
104 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
105 	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
106 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
107 }
108 
109 /*
110  * Initialize lwp's kernel stack.
111  * Note that now that the floating point register save area (kfpu_t)
112  * has been broken out from machpcb and aligned on a 64 byte boundary so that
113  * we can do block load/stores to/from it, there are a couple of potential
114  * optimizations to save stack space. 1. The floating point register save
115  * area could be aligned on a 16 byte boundary, and the floating point code
116  * changed to (a) check the alignment and (b) use different save/restore
117  * macros depending upon the alignment. 2. The lwp_stk_init code below
118  * could be changed to calculate if less space would be wasted if machpcb
119  * was first instead of second. However there is a REGOFF macro used in
120  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
121  * register area is a fixed distance from the %sp, and would have to be
122  * changed to a pointer or something...JJ said later.
123  */
124 caddr_t
125 lwp_stk_init(klwp_t *lwp, caddr_t stk)
126 {
127 	struct machpcb *mpcb;
128 	kfpu_t *fp;
129 	uintptr_t aln;
130 
131 	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
132 	aln = (uintptr_t)stk & 0x3F;
133 	stk -= aln;
134 	fp = (kfpu_t *)stk;
135 	stk -= SA(sizeof (struct machpcb));
136 	mpcb = (struct machpcb *)stk;
137 	bzero(mpcb, sizeof (struct machpcb));
138 	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
139 	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
140 	lwp->lwp_fpu = (void *)fp;
141 	mpcb->mpcb_fpu = fp;
142 	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
143 	mpcb->mpcb_thread = lwp->lwp_thread;
144 	mpcb->mpcb_wbcnt = 0;
145 	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
146 		mpcb->mpcb_wstate = WSTATE_USER32;
147 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
148 	} else {
149 		mpcb->mpcb_wstate = WSTATE_USER64;
150 		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
151 	}
152 	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
153 	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
154 	mpcb->mpcb_pa = va_to_pa(mpcb);
155 	return (stk);
156 }
157 
158 void
159 lwp_stk_fini(klwp_t *lwp)
160 {
161 	struct machpcb *mpcb = lwptompcb(lwp);
162 
163 	/*
164 	 * there might be windows still in the wbuf due to unmapped
165 	 * stack, misaligned stack pointer, etc.  We just free it.
166 	 */
167 	mpcb->mpcb_wbcnt = 0;
168 	if (mpcb->mpcb_wstate == WSTATE_USER32)
169 		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
170 	else
171 		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
172 	mpcb->mpcb_wbuf = NULL;
173 	mpcb->mpcb_wbuf_pa = -1;
174 }
175 
176 
177 /*
178  * Copy regs from parent to child.
179  */
180 void
181 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
182 {
183 	kthread_t *t, *pt = lwptot(lwp);
184 	struct machpcb *mpcb = lwptompcb(clwp);
185 	struct machpcb *pmpcb = lwptompcb(lwp);
186 	kfpu_t *fp, *pfp = lwptofpu(lwp);
187 	caddr_t wbuf;
188 	uint_t wstate;
189 
190 	t = mpcb->mpcb_thread;
191 	/*
192 	 * remember child's fp and wbuf since they will get erased during
193 	 * the bcopy.
194 	 */
195 	fp = mpcb->mpcb_fpu;
196 	wbuf = mpcb->mpcb_wbuf;
197 	wstate = mpcb->mpcb_wstate;
198 	/*
199 	 * Don't copy mpcb_frame since we hand-crafted it
200 	 * in thread_load().
201 	 */
202 	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
203 	mpcb->mpcb_thread = t;
204 	mpcb->mpcb_fpu = fp;
205 	fp->fpu_q = mpcb->mpcb_fpu_q;
206 
207 	/*
208 	 * It is theoretically possibly for the lwp's wstate to
209 	 * be different from its value assigned in lwp_stk_init,
210 	 * since lwp_stk_init assumed the data model of the process.
211 	 * Here, we took on the data model of the cloned lwp.
212 	 */
213 	if (mpcb->mpcb_wstate != wstate) {
214 		if (wstate == WSTATE_USER32) {
215 			kmem_cache_free(wbuf32_cache, wbuf);
216 			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
217 			wstate = WSTATE_USER64;
218 		} else {
219 			kmem_cache_free(wbuf64_cache, wbuf);
220 			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
221 			wstate = WSTATE_USER32;
222 		}
223 	}
224 
225 	mpcb->mpcb_pa = va_to_pa(mpcb);
226 	mpcb->mpcb_wbuf = wbuf;
227 	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
228 
229 	ASSERT(mpcb->mpcb_wstate == wstate);
230 
231 	if (mpcb->mpcb_wbcnt != 0) {
232 		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
233 		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
234 		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
235 	}
236 
237 	if (pt == curthread)
238 		pfp->fpu_fprs = _fp_read_fprs();
239 	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
240 		if (pt == curthread && fpu_exists) {
241 			save_gsr(clwp->lwp_fpu);
242 		} else {
243 			uint64_t gsr;
244 			gsr = get_gsr(lwp->lwp_fpu);
245 			set_gsr(gsr, clwp->lwp_fpu);
246 		}
247 		fp_fork(lwp, clwp);
248 	}
249 }
250 
251 /*
252  * Free lwp fpu regs.
253  */
254 void
255 lwp_freeregs(klwp_t *lwp, int isexec)
256 {
257 	kfpu_t *fp = lwptofpu(lwp);
258 
259 	if (lwptot(lwp) == curthread)
260 		fp->fpu_fprs = _fp_read_fprs();
261 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
262 		fp_free(fp, isexec);
263 }
264 
265 /*
266  * These function are currently unused on sparc.
267  */
268 /*ARGSUSED*/
269 void
270 lwp_attach_brand_hdlrs(klwp_t *lwp)
271 {}
272 
273 /*ARGSUSED*/
274 void
275 lwp_detach_brand_hdlrs(klwp_t *lwp)
276 {}
277 
278 /*
279  * fill in the extra register state area specified with the
280  * specified lwp's platform-dependent non-floating-point extra
281  * register state information
282  */
283 /* ARGSUSED */
284 void
285 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
286 {
287 	/* for sun4u nothing to do here, added for symmetry */
288 }
289 
290 /*
291  * fill in the extra register state area specified with the specified lwp's
292  * platform-dependent floating-point extra register state information.
293  * NOTE:  'lwp' might not correspond to 'curthread' since this is
294  * called from code in /proc to get the registers of another lwp.
295  */
296 void
297 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
298 {
299 	prxregset_t *xregs = (prxregset_t *)xrp;
300 	kfpu_t *fp = lwptofpu(lwp);
301 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
302 	uint64_t gsr;
303 
304 	/*
305 	 * fp_fksave() does not flush the GSR register into
306 	 * the lwp area, so do it now
307 	 */
308 	kpreempt_disable();
309 	if (ttolwp(curthread) == lwp && fpu_exists) {
310 		fp->fpu_fprs = _fp_read_fprs();
311 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
312 			_fp_write_fprs(fprs);
313 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
314 		}
315 		save_gsr(fp);
316 	}
317 	gsr = get_gsr(fp);
318 	kpreempt_enable();
319 	PRXREG_GSR(xregs) = gsr;
320 }
321 
322 /*
323  * set the specified lwp's platform-dependent non-floating-point
324  * extra register state based on the specified input
325  */
326 /* ARGSUSED */
327 void
328 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
329 {
330 	/* for sun4u nothing to do here, added for symmetry */
331 }
332 
333 /*
334  * set the specified lwp's platform-dependent floating-point
335  * extra register state based on the specified input
336  */
337 void
338 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
339 {
340 	prxregset_t *xregs = (prxregset_t *)xrp;
341 	kfpu_t *fp = lwptofpu(lwp);
342 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
343 	uint64_t gsr = PRXREG_GSR(xregs);
344 
345 	kpreempt_disable();
346 	set_gsr(gsr, lwptofpu(lwp));
347 
348 	if ((lwp == ttolwp(curthread)) && fpu_exists) {
349 		fp->fpu_fprs = _fp_read_fprs();
350 		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
351 			_fp_write_fprs(fprs);
352 			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
353 		}
354 		restore_gsr(lwptofpu(lwp));
355 	}
356 	kpreempt_enable();
357 }
358 
359 /*
360  * fill in the sun4u asrs, ie, the lwp's platform-dependent
361  * non-floating-point extra register state information
362  */
363 /* ARGSUSED */
364 void
365 getasrs(klwp_t *lwp, asrset_t asr)
366 {
367 	/* for sun4u nothing to do here, added for symmetry */
368 }
369 
370 /*
371  * fill in the sun4u asrs, ie, the lwp's platform-dependent
372  * floating-point extra register state information
373  */
374 void
375 getfpasrs(klwp_t *lwp, asrset_t asr)
376 {
377 	kfpu_t *fp = lwptofpu(lwp);
378 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
379 
380 	kpreempt_disable();
381 	if (ttolwp(curthread) == lwp)
382 		fp->fpu_fprs = _fp_read_fprs();
383 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
384 		if (fpu_exists && ttolwp(curthread) == lwp) {
385 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
386 				_fp_write_fprs(fprs);
387 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
388 			}
389 			save_gsr(fp);
390 		}
391 		asr[ASR_GSR] = (int64_t)get_gsr(fp);
392 	}
393 	kpreempt_enable();
394 }
395 
396 /*
397  * set the sun4u asrs, ie, the lwp's platform-dependent
398  * non-floating-point extra register state information
399  */
400 /* ARGSUSED */
401 void
402 setasrs(klwp_t *lwp, asrset_t asr)
403 {
404 	/* for sun4u nothing to do here, added for symmetry */
405 }
406 
407 void
408 setfpasrs(klwp_t *lwp, asrset_t asr)
409 {
410 	kfpu_t *fp = lwptofpu(lwp);
411 	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
412 
413 	kpreempt_disable();
414 	if (ttolwp(curthread) == lwp)
415 		fp->fpu_fprs = _fp_read_fprs();
416 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
417 		set_gsr(asr[ASR_GSR], fp);
418 		if (fpu_exists && ttolwp(curthread) == lwp) {
419 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
420 				_fp_write_fprs(fprs);
421 				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
422 			}
423 			restore_gsr(fp);
424 		}
425 	}
426 	kpreempt_enable();
427 }
428 
429 /*
430  * Create interrupt kstats for this CPU.
431  */
432 void
433 cpu_create_intrstat(cpu_t *cp)
434 {
435 	int		i;
436 	kstat_t		*intr_ksp;
437 	kstat_named_t	*knp;
438 	char		name[KSTAT_STRLEN];
439 	zoneid_t	zoneid;
440 
441 	ASSERT(MUTEX_HELD(&cpu_lock));
442 
443 	if (pool_pset_enabled())
444 		zoneid = GLOBAL_ZONEID;
445 	else
446 		zoneid = ALL_ZONES;
447 
448 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
449 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
450 
451 	/*
452 	 * Initialize each PIL's named kstat
453 	 */
454 	if (intr_ksp != NULL) {
455 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
456 		knp = (kstat_named_t *)intr_ksp->ks_data;
457 		intr_ksp->ks_private = cp;
458 		for (i = 0; i < PIL_MAX; i++) {
459 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
460 			    i + 1);
461 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
462 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
463 			    i + 1);
464 			kstat_named_init(&knp[(i * 2) + 1], name,
465 			    KSTAT_DATA_UINT64);
466 		}
467 		kstat_install(intr_ksp);
468 	}
469 }
470 
471 /*
472  * Delete interrupt kstats for this CPU.
473  */
474 void
475 cpu_delete_intrstat(cpu_t *cp)
476 {
477 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
478 }
479 
480 /*
481  * Convert interrupt statistics from CPU ticks to nanoseconds and
482  * update kstat.
483  */
484 int
485 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
486 {
487 	kstat_named_t	*knp = ksp->ks_data;
488 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
489 	int		i;
490 
491 	if (rw == KSTAT_WRITE)
492 		return (EACCES);
493 
494 	/*
495 	 * We use separate passes to copy and convert the statistics to
496 	 * nanoseconds. This assures that the snapshot of the data is as
497 	 * self-consistent as possible.
498 	 */
499 
500 	for (i = 0; i < PIL_MAX; i++) {
501 		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
502 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
503 	}
504 
505 	for (i = 0; i < PIL_MAX; i++) {
506 		knp[i * 2].value.ui64 =
507 		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
508 		    cpup->cpu_id);
509 	}
510 
511 	return (0);
512 }
513 
514 /*
515  * Called by common/os/cpu.c for psrinfo(1m) kstats
516  */
517 char *
518 cpu_fru_fmri(cpu_t *cp)
519 {
520 	return (cpunodes[cp->cpu_id].fru_fmri);
521 }
522 
523 /*
524  * An interrupt thread is ending a time slice, so compute the interval it
525  * ran for and update the statistic for its PIL.
526  */
527 void
528 cpu_intr_swtch_enter(kthread_id_t t)
529 {
530 	uint64_t	interval;
531 	uint64_t	start;
532 	cpu_t		*cpu;
533 
534 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
535 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
536 
537 	/*
538 	 * We could be here with a zero timestamp. This could happen if:
539 	 * an interrupt thread which no longer has a pinned thread underneath
540 	 * it (i.e. it blocked at some point in its past) has finished running
541 	 * its handler. intr_thread() updated the interrupt statistic for its
542 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
543 	 * return to, swtch() gets called and we end up here.
544 	 *
545 	 * It can also happen if an interrupt thread in intr_thread() calls
546 	 * preempt. It will have already taken care of updating stats. In
547 	 * this event, the interrupt thread will be runnable.
548 	 */
549 	if (t->t_intr_start) {
550 		do {
551 			start = t->t_intr_start;
552 			interval = CLOCK_TICK_COUNTER() - start;
553 		} while (atomic_cas_64(&t->t_intr_start, start, 0) != start);
554 		cpu = CPU;
555 		if (cpu->cpu_m.divisor > 1)
556 			interval *= cpu->cpu_m.divisor;
557 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
558 
559 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
560 		    interval);
561 	} else
562 		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
563 }
564 
565 
566 /*
567  * An interrupt thread is returning from swtch(). Place a starting timestamp
568  * in its thread structure.
569  */
570 void
571 cpu_intr_swtch_exit(kthread_id_t t)
572 {
573 	uint64_t ts;
574 
575 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
576 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
577 
578 	do {
579 		ts = t->t_intr_start;
580 	} while (atomic_cas_64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) !=
581 	    ts);
582 }
583 
584 
585 int
586 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
587 {
588 	if (&plat_blacklist)
589 		return (plat_blacklist(cmd, scheme, fmri, class));
590 
591 	return (ENOTSUP);
592 }
593 
594 int
595 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
596 {
597 	extern void kdi_flush_caches(void);
598 	size_t nread = 0;
599 	uint32_t word;
600 	int slop, i;
601 
602 	kdi_flush_caches();
603 	membar_enter();
604 
605 	/* We might not begin on a word boundary. */
606 	if ((slop = addr & 3) != 0) {
607 		word = ldphys(addr & ~3);
608 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
609 			*buf++ = ((uchar_t *)&word)[i];
610 		addr = roundup(addr, 4);
611 	}
612 
613 	while (nbytes > 0) {
614 		word = ldphys(addr);
615 		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
616 			*buf++ = ((uchar_t *)&word)[i];
617 	}
618 
619 	kdi_flush_caches();
620 
621 	*ncopiedp = nread;
622 	return (0);
623 }
624 
625 int
626 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
627 {
628 	extern void kdi_flush_caches(void);
629 	size_t nwritten = 0;
630 	uint32_t word;
631 	int slop, i;
632 
633 	kdi_flush_caches();
634 
635 	/* We might not begin on a word boundary. */
636 	if ((slop = addr & 3) != 0) {
637 		word = ldphys(addr & ~3);
638 		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
639 			((uchar_t *)&word)[i] = *buf++;
640 		stphys(addr & ~3, word);
641 		addr = roundup(addr, 4);
642 	}
643 
644 	while (nbytes > 3) {
645 		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
646 			((uchar_t *)&word)[i] = *buf++;
647 		stphys(addr, word);
648 		addr += 4;
649 	}
650 
651 	/* We might not end with a whole word. */
652 	if (nbytes > 0) {
653 		word = ldphys(addr);
654 		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
655 			((uchar_t *)&word)[i] = *buf++;
656 		stphys(addr, word);
657 	}
658 
659 	membar_enter();
660 	kdi_flush_caches();
661 
662 	*ncopiedp = nwritten;
663 	return (0);
664 }
665 
666 static void
667 kdi_kernpanic(struct regs *regs, uint_t tt)
668 {
669 	sync_reg_buf = *regs;
670 	sync_tt = tt;
671 
672 	sync_handler();
673 }
674 
675 static void
676 kdi_plat_call(void (*platfn)(void))
677 {
678 	if (platfn != NULL) {
679 		prom_suspend_prepost();
680 		platfn();
681 		prom_resume_prepost();
682 	}
683 }
684 
685 /*
686  * kdi_system_claim and release are defined here for all sun4 platforms and
687  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
688  * Specific sun4u or sun4v platforms may implement their own claim and release
689  * routines, at which point their respective callbacks will be updated.
690  */
691 static void
692 kdi_system_claim(void)
693 {
694 	lbolt_debug_entry();
695 }
696 
697 static void
698 kdi_system_release(void)
699 {
700 	lbolt_debug_return();
701 }
702 
703 void
704 mach_kdi_init(kdi_t *kdi)
705 {
706 	kdi->kdi_plat_call = kdi_plat_call;
707 	kdi->kdi_kmdb_enter = kmdb_enter;
708 	kdi->pkdi_system_claim = kdi_system_claim;
709 	kdi->pkdi_system_release = kdi_system_release;
710 	kdi->mkdi_cpu_index = kdi_cpu_index;
711 	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
712 	kdi->mkdi_kernpanic = kdi_kernpanic;
713 }
714 
715 
716 /*
717  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
718  * long, and it fills in the array with the time spent on cpu in
719  * each of the mstates, where time is returned in nsec.
720  *
721  * No guarantee is made that the returned values in times[] will
722  * monotonically increase on sequential calls, although this will
723  * be true in the long run. Any such guarantee must be handled by
724  * the caller, if needed. This can happen if we fail to account
725  * for elapsed time due to a generation counter conflict, yet we
726  * did account for it on a prior call (see below).
727  *
728  * The complication is that the cpu in question may be updating
729  * its microstate at the same time that we are reading it.
730  * Because the microstate is only updated when the CPU's state
731  * changes, the values in cpu_intracct[] can be indefinitely out
732  * of date. To determine true current values, it is necessary to
733  * compare the current time with cpu_mstate_start, and add the
734  * difference to times[cpu_mstate].
735  *
736  * This can be a problem if those values are changing out from
737  * under us. Because the code path in new_cpu_mstate() is
738  * performance critical, we have not added a lock to it. Instead,
739  * we have added a generation counter. Before beginning
740  * modifications, the counter is set to 0. After modifications,
741  * it is set to the old value plus one.
742  *
743  * get_cpu_mstate() will not consider the values of cpu_mstate
744  * and cpu_mstate_start to be usable unless the value of
745  * cpu_mstate_gen is both non-zero and unchanged, both before and
746  * after reading the mstate information. Note that we must
747  * protect against out-of-order loads around accesses to the
748  * generation counter. Also, this is a best effort approach in
749  * that we do not retry should the counter be found to have
750  * changed.
751  *
752  * cpu_intracct[] is used to identify time spent in each CPU
753  * mstate while handling interrupts. Such time should be reported
754  * against system time, and so is subtracted out from its
755  * corresponding cpu_acct[] time and added to
756  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
757  * %ticks, but acct time may be stored as %sticks, thus requiring
758  * different conversions before they can be compared.
759  */
760 
761 void
762 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
763 {
764 	int i;
765 	hrtime_t now, start;
766 	uint16_t gen;
767 	uint16_t state;
768 	hrtime_t intracct[NCMSTATES];
769 
770 	/*
771 	 * Load all volatile state under the protection of membar.
772 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
773 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
774 	 * arrives after we make our last check of cpu_mstate_gen.
775 	 */
776 
777 	now = gethrtime_unscaled();
778 	gen = cpu->cpu_mstate_gen;
779 
780 	membar_consumer();	/* guarantee load ordering */
781 	start = cpu->cpu_mstate_start;
782 	state = cpu->cpu_mstate;
783 	for (i = 0; i < NCMSTATES; i++) {
784 		intracct[i] = cpu->cpu_intracct[i];
785 		times[i] = cpu->cpu_acct[i];
786 	}
787 	membar_consumer();	/* guarantee load ordering */
788 
789 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
790 		times[state] += now - start;
791 
792 	for (i = 0; i < NCMSTATES; i++) {
793 		scalehrtime(&times[i]);
794 		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
795 	}
796 
797 	for (i = 0; i < NCMSTATES; i++) {
798 		if (i == CMS_SYSTEM)
799 			continue;
800 		times[i] -= intracct[i];
801 		if (times[i] < 0) {
802 			intracct[i] += times[i];
803 			times[i] = 0;
804 		}
805 		times[CMS_SYSTEM] += intracct[i];
806 	}
807 }
808 
809 void
810 mach_cpu_pause(volatile char *safe)
811 {
812 	/*
813 	 * This cpu is now safe.
814 	 */
815 	*safe = PAUSE_WAIT;
816 	membar_enter(); /* make sure stores are flushed */
817 
818 	/*
819 	 * Now we wait.  When we are allowed to continue, safe
820 	 * will be set to PAUSE_IDLE.
821 	 */
822 	while (*safe != PAUSE_IDLE)
823 		SMT_PAUSE();
824 }
825 
826 /*ARGSUSED*/
827 int
828 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
829 {
830 	return (ENOTSUP);
831 }
832 
833 /* cpu threshold for compressed dumps */
834 #ifdef sun4v
835 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4V_MINCPU;
836 #else
837 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4U_MINCPU;
838 #endif
839 
840 int
841 dump_plat_addr()
842 {
843 	return (0);
844 }
845 
846 void
847 dump_plat_pfn()
848 {
849 }
850 
851 /* ARGSUSED */
852 int
853 dump_plat_data(void *dump_cdata)
854 {
855 	return (0);
856 }
857 
858 /* ARGSUSED */
859 int
860 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
861 {
862 	return (PLAT_HOLD_OK);
863 }
864 
865 /* ARGSUSED */
866 void
867 plat_release_page(page_t *pp)
868 {
869 }
870 
871 /* ARGSUSED */
872 void
873 progressbar_key_abort(ldi_ident_t li)
874 {
875 }
876 
877 /*
878  * We need to post a soft interrupt to reprogram the lbolt cyclic when
879  * switching from event to cyclic driven lbolt. The following code adds
880  * and posts the softint for sun4 platforms.
881  */
882 static uint64_t lbolt_softint_inum;
883 
884 void
885 lbolt_softint_add(void)
886 {
887 	lbolt_softint_inum = add_softintr(LOCK_LEVEL,
888 	    (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
889 }
890 
891 void
892 lbolt_softint_post(void)
893 {
894 	setsoftint(lbolt_softint_inum);
895 }
896