xref: /titanic_44/usr/src/uts/sun4/os/machdep.c (revision cab25e88571b90083876d2876f790fdf153773a2)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  #include <sys/types.h>
27  #include <sys/kstat.h>
28  #include <sys/param.h>
29  #include <sys/stack.h>
30  #include <sys/regset.h>
31  #include <sys/thread.h>
32  #include <sys/proc.h>
33  #include <sys/procfs_isa.h>
34  #include <sys/kmem.h>
35  #include <sys/cpuvar.h>
36  #include <sys/systm.h>
37  #include <sys/machpcb.h>
38  #include <sys/machasi.h>
39  #include <sys/vis.h>
40  #include <sys/fpu/fpusystm.h>
41  #include <sys/cpu_module.h>
42  #include <sys/privregs.h>
43  #include <sys/archsystm.h>
44  #include <sys/atomic.h>
45  #include <sys/cmn_err.h>
46  #include <sys/time.h>
47  #include <sys/clock.h>
48  #include <sys/cmp.h>
49  #include <sys/platform_module.h>
50  #include <sys/bl.h>
51  #include <sys/nvpair.h>
52  #include <sys/kdi_impl.h>
53  #include <sys/machsystm.h>
54  #include <sys/sysmacros.h>
55  #include <sys/promif.h>
56  #include <sys/pool_pset.h>
57  #include <sys/mem.h>
58  #include <sys/dumphdr.h>
59  #include <vm/seg_kmem.h>
60  #include <sys/hold_page.h>
61  #include <sys/cpu.h>
62  #include <sys/ivintr.h>
63  #include <sys/clock_impl.h>
64  
65  int maxphys = MMU_PAGESIZE * 16;	/* 128k */
66  int klustsize = MMU_PAGESIZE * 16;	/* 128k */
67  
68  /*
69   * Initialize kernel thread's stack.
70   */
71  caddr_t
72  thread_stk_init(caddr_t stk)
73  {
74  	kfpu_t *fp;
75  	ulong_t align;
76  
77  	/* allocate extra space for floating point state */
78  	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
79  	align = (uintptr_t)stk & 0x3f;
80  	stk -= align;		/* force v9_fpu to be 16 byte aligned */
81  	fp = (kfpu_t *)stk;
82  	fp->fpu_fprs = 0;
83  
84  	stk -= SA(MINFRAME);
85  	return (stk);
86  }
87  
88  #define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
89  #define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
90  
91  kmem_cache_t	*wbuf32_cache;
92  kmem_cache_t	*wbuf64_cache;
93  
94  void
95  lwp_stk_cache_init(void)
96  {
97  	/*
98  	 * Window buffers are allocated from the static arena
99  	 * because they are accessed at TL>0. We also must use
100  	 * KMC_NOHASH to prevent them from straddling page
101  	 * boundaries as they are accessed by physical address.
102  	 */
103  	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
104  	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
105  	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
106  	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
107  }
108  
109  /*
110   * Initialize lwp's kernel stack.
111   * Note that now that the floating point register save area (kfpu_t)
112   * has been broken out from machpcb and aligned on a 64 byte boundary so that
113   * we can do block load/stores to/from it, there are a couple of potential
114   * optimizations to save stack space. 1. The floating point register save
115   * area could be aligned on a 16 byte boundary, and the floating point code
116   * changed to (a) check the alignment and (b) use different save/restore
117   * macros depending upon the alignment. 2. The lwp_stk_init code below
118   * could be changed to calculate if less space would be wasted if machpcb
119   * was first instead of second. However there is a REGOFF macro used in
120   * locore, syscall_trap, machdep and mlsetup that assumes that the saved
121   * register area is a fixed distance from the %sp, and would have to be
122   * changed to a pointer or something...JJ said later.
123   */
124  caddr_t
125  lwp_stk_init(klwp_t *lwp, caddr_t stk)
126  {
127  	struct machpcb *mpcb;
128  	kfpu_t *fp;
129  	uintptr_t aln;
130  
131  	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
132  	aln = (uintptr_t)stk & 0x3F;
133  	stk -= aln;
134  	fp = (kfpu_t *)stk;
135  	stk -= SA(sizeof (struct machpcb));
136  	mpcb = (struct machpcb *)stk;
137  	bzero(mpcb, sizeof (struct machpcb));
138  	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
139  	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
140  	lwp->lwp_fpu = (void *)fp;
141  	mpcb->mpcb_fpu = fp;
142  	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
143  	mpcb->mpcb_thread = lwp->lwp_thread;
144  	mpcb->mpcb_wbcnt = 0;
145  	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
146  		mpcb->mpcb_wstate = WSTATE_USER32;
147  		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
148  	} else {
149  		mpcb->mpcb_wstate = WSTATE_USER64;
150  		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
151  	}
152  	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
153  	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
154  	mpcb->mpcb_pa = va_to_pa(mpcb);
155  	return (stk);
156  }
157  
158  void
159  lwp_stk_fini(klwp_t *lwp)
160  {
161  	struct machpcb *mpcb = lwptompcb(lwp);
162  
163  	/*
164  	 * there might be windows still in the wbuf due to unmapped
165  	 * stack, misaligned stack pointer, etc.  We just free it.
166  	 */
167  	mpcb->mpcb_wbcnt = 0;
168  	if (mpcb->mpcb_wstate == WSTATE_USER32)
169  		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
170  	else
171  		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
172  	mpcb->mpcb_wbuf = NULL;
173  	mpcb->mpcb_wbuf_pa = -1;
174  }
175  
176  
177  /*
178   * Copy regs from parent to child.
179   */
180  void
181  lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
182  {
183  	kthread_t *t, *pt = lwptot(lwp);
184  	struct machpcb *mpcb = lwptompcb(clwp);
185  	struct machpcb *pmpcb = lwptompcb(lwp);
186  	kfpu_t *fp, *pfp = lwptofpu(lwp);
187  	caddr_t wbuf;
188  	uint_t wstate;
189  
190  	t = mpcb->mpcb_thread;
191  	/*
192  	 * remember child's fp and wbuf since they will get erased during
193  	 * the bcopy.
194  	 */
195  	fp = mpcb->mpcb_fpu;
196  	wbuf = mpcb->mpcb_wbuf;
197  	wstate = mpcb->mpcb_wstate;
198  	/*
199  	 * Don't copy mpcb_frame since we hand-crafted it
200  	 * in thread_load().
201  	 */
202  	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
203  	mpcb->mpcb_thread = t;
204  	mpcb->mpcb_fpu = fp;
205  	fp->fpu_q = mpcb->mpcb_fpu_q;
206  
207  	/*
208  	 * It is theoretically possibly for the lwp's wstate to
209  	 * be different from its value assigned in lwp_stk_init,
210  	 * since lwp_stk_init assumed the data model of the process.
211  	 * Here, we took on the data model of the cloned lwp.
212  	 */
213  	if (mpcb->mpcb_wstate != wstate) {
214  		if (wstate == WSTATE_USER32) {
215  			kmem_cache_free(wbuf32_cache, wbuf);
216  			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
217  			wstate = WSTATE_USER64;
218  		} else {
219  			kmem_cache_free(wbuf64_cache, wbuf);
220  			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
221  			wstate = WSTATE_USER32;
222  		}
223  	}
224  
225  	mpcb->mpcb_pa = va_to_pa(mpcb);
226  	mpcb->mpcb_wbuf = wbuf;
227  	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
228  
229  	ASSERT(mpcb->mpcb_wstate == wstate);
230  
231  	if (mpcb->mpcb_wbcnt != 0) {
232  		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
233  		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
234  		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
235  	}
236  
237  	if (pt == curthread)
238  		pfp->fpu_fprs = _fp_read_fprs();
239  	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
240  		if (pt == curthread && fpu_exists) {
241  			save_gsr(clwp->lwp_fpu);
242  		} else {
243  			uint64_t gsr;
244  			gsr = get_gsr(lwp->lwp_fpu);
245  			set_gsr(gsr, clwp->lwp_fpu);
246  		}
247  		fp_fork(lwp, clwp);
248  	}
249  }
250  
251  /*
252   * Free lwp fpu regs.
253   */
254  void
255  lwp_freeregs(klwp_t *lwp, int isexec)
256  {
257  	kfpu_t *fp = lwptofpu(lwp);
258  
259  	if (lwptot(lwp) == curthread)
260  		fp->fpu_fprs = _fp_read_fprs();
261  	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
262  		fp_free(fp, isexec);
263  }
264  
265  /*
266   * These function are currently unused on sparc.
267   */
268  /*ARGSUSED*/
269  void
270  lwp_attach_brand_hdlrs(klwp_t *lwp)
271  {}
272  
273  /*ARGSUSED*/
274  void
275  lwp_detach_brand_hdlrs(klwp_t *lwp)
276  {}
277  
278  /*
279   * fill in the extra register state area specified with the
280   * specified lwp's platform-dependent non-floating-point extra
281   * register state information
282   */
283  /* ARGSUSED */
284  void
285  xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
286  {
287  	/* for sun4u nothing to do here, added for symmetry */
288  }
289  
290  /*
291   * fill in the extra register state area specified with the specified lwp's
292   * platform-dependent floating-point extra register state information.
293   * NOTE:  'lwp' might not correspond to 'curthread' since this is
294   * called from code in /proc to get the registers of another lwp.
295   */
296  void
297  xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
298  {
299  	prxregset_t *xregs = (prxregset_t *)xrp;
300  	kfpu_t *fp = lwptofpu(lwp);
301  	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
302  	uint64_t gsr;
303  
304  	/*
305  	 * fp_fksave() does not flush the GSR register into
306  	 * the lwp area, so do it now
307  	 */
308  	kpreempt_disable();
309  	if (ttolwp(curthread) == lwp && fpu_exists) {
310  		fp->fpu_fprs = _fp_read_fprs();
311  		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
312  			_fp_write_fprs(fprs);
313  			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
314  		}
315  		save_gsr(fp);
316  	}
317  	gsr = get_gsr(fp);
318  	kpreempt_enable();
319  	PRXREG_GSR(xregs) = gsr;
320  }
321  
322  /*
323   * set the specified lwp's platform-dependent non-floating-point
324   * extra register state based on the specified input
325   */
326  /* ARGSUSED */
327  void
328  xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
329  {
330  	/* for sun4u nothing to do here, added for symmetry */
331  }
332  
333  /*
334   * set the specified lwp's platform-dependent floating-point
335   * extra register state based on the specified input
336   */
337  void
338  xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
339  {
340  	prxregset_t *xregs = (prxregset_t *)xrp;
341  	kfpu_t *fp = lwptofpu(lwp);
342  	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
343  	uint64_t gsr = PRXREG_GSR(xregs);
344  
345  	kpreempt_disable();
346  	set_gsr(gsr, lwptofpu(lwp));
347  
348  	if ((lwp == ttolwp(curthread)) && fpu_exists) {
349  		fp->fpu_fprs = _fp_read_fprs();
350  		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
351  			_fp_write_fprs(fprs);
352  			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
353  		}
354  		restore_gsr(lwptofpu(lwp));
355  	}
356  	kpreempt_enable();
357  }
358  
359  /*
360   * fill in the sun4u asrs, ie, the lwp's platform-dependent
361   * non-floating-point extra register state information
362   */
363  /* ARGSUSED */
364  void
365  getasrs(klwp_t *lwp, asrset_t asr)
366  {
367  	/* for sun4u nothing to do here, added for symmetry */
368  }
369  
370  /*
371   * fill in the sun4u asrs, ie, the lwp's platform-dependent
372   * floating-point extra register state information
373   */
374  void
375  getfpasrs(klwp_t *lwp, asrset_t asr)
376  {
377  	kfpu_t *fp = lwptofpu(lwp);
378  	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
379  
380  	kpreempt_disable();
381  	if (ttolwp(curthread) == lwp)
382  		fp->fpu_fprs = _fp_read_fprs();
383  	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
384  		if (fpu_exists && ttolwp(curthread) == lwp) {
385  			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
386  				_fp_write_fprs(fprs);
387  				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
388  			}
389  			save_gsr(fp);
390  		}
391  		asr[ASR_GSR] = (int64_t)get_gsr(fp);
392  	}
393  	kpreempt_enable();
394  }
395  
396  /*
397   * set the sun4u asrs, ie, the lwp's platform-dependent
398   * non-floating-point extra register state information
399   */
400  /* ARGSUSED */
401  void
402  setasrs(klwp_t *lwp, asrset_t asr)
403  {
404  	/* for sun4u nothing to do here, added for symmetry */
405  }
406  
407  void
408  setfpasrs(klwp_t *lwp, asrset_t asr)
409  {
410  	kfpu_t *fp = lwptofpu(lwp);
411  	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
412  
413  	kpreempt_disable();
414  	if (ttolwp(curthread) == lwp)
415  		fp->fpu_fprs = _fp_read_fprs();
416  	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
417  		set_gsr(asr[ASR_GSR], fp);
418  		if (fpu_exists && ttolwp(curthread) == lwp) {
419  			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
420  				_fp_write_fprs(fprs);
421  				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
422  			}
423  			restore_gsr(fp);
424  		}
425  	}
426  	kpreempt_enable();
427  }
428  
429  /*
430   * Create interrupt kstats for this CPU.
431   */
432  void
433  cpu_create_intrstat(cpu_t *cp)
434  {
435  	int		i;
436  	kstat_t		*intr_ksp;
437  	kstat_named_t	*knp;
438  	char		name[KSTAT_STRLEN];
439  	zoneid_t	zoneid;
440  
441  	ASSERT(MUTEX_HELD(&cpu_lock));
442  
443  	if (pool_pset_enabled())
444  		zoneid = GLOBAL_ZONEID;
445  	else
446  		zoneid = ALL_ZONES;
447  
448  	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
449  	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
450  
451  	/*
452  	 * Initialize each PIL's named kstat
453  	 */
454  	if (intr_ksp != NULL) {
455  		intr_ksp->ks_update = cpu_kstat_intrstat_update;
456  		knp = (kstat_named_t *)intr_ksp->ks_data;
457  		intr_ksp->ks_private = cp;
458  		for (i = 0; i < PIL_MAX; i++) {
459  			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
460  			    i + 1);
461  			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
462  			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
463  			    i + 1);
464  			kstat_named_init(&knp[(i * 2) + 1], name,
465  			    KSTAT_DATA_UINT64);
466  		}
467  		kstat_install(intr_ksp);
468  	}
469  }
470  
471  /*
472   * Delete interrupt kstats for this CPU.
473   */
474  void
475  cpu_delete_intrstat(cpu_t *cp)
476  {
477  	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
478  }
479  
480  /*
481   * Convert interrupt statistics from CPU ticks to nanoseconds and
482   * update kstat.
483   */
484  int
485  cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
486  {
487  	kstat_named_t	*knp = ksp->ks_data;
488  	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
489  	int		i;
490  
491  	if (rw == KSTAT_WRITE)
492  		return (EACCES);
493  
494  	/*
495  	 * We use separate passes to copy and convert the statistics to
496  	 * nanoseconds. This assures that the snapshot of the data is as
497  	 * self-consistent as possible.
498  	 */
499  
500  	for (i = 0; i < PIL_MAX; i++) {
501  		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
502  		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
503  	}
504  
505  	for (i = 0; i < PIL_MAX; i++) {
506  		knp[i * 2].value.ui64 =
507  		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
508  		    cpup->cpu_id);
509  	}
510  
511  	return (0);
512  }
513  
514  /*
515   * Called by common/os/cpu.c for psrinfo(1m) kstats
516   */
517  char *
518  cpu_fru_fmri(cpu_t *cp)
519  {
520  	return (cpunodes[cp->cpu_id].fru_fmri);
521  }
522  
523  /*
524   * An interrupt thread is ending a time slice, so compute the interval it
525   * ran for and update the statistic for its PIL.
526   */
527  void
528  cpu_intr_swtch_enter(kthread_id_t t)
529  {
530  	uint64_t	interval;
531  	uint64_t	start;
532  	cpu_t		*cpu;
533  
534  	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
535  	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
536  
537  	/*
538  	 * We could be here with a zero timestamp. This could happen if:
539  	 * an interrupt thread which no longer has a pinned thread underneath
540  	 * it (i.e. it blocked at some point in its past) has finished running
541  	 * its handler. intr_thread() updated the interrupt statistic for its
542  	 * PIL and zeroed its timestamp. Since there was no pinned thread to
543  	 * return to, swtch() gets called and we end up here.
544  	 *
545  	 * It can also happen if an interrupt thread in intr_thread() calls
546  	 * preempt. It will have already taken care of updating stats. In
547  	 * this event, the interrupt thread will be runnable.
548  	 */
549  	if (t->t_intr_start) {
550  		do {
551  			start = t->t_intr_start;
552  			interval = gettick_counter() - start;
553  		} while (cas64(&t->t_intr_start, start, 0) != start);
554  		cpu = CPU;
555  		if (cpu->cpu_m.divisor > 1)
556  			interval *= cpu->cpu_m.divisor;
557  		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
558  
559  		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
560  		    interval);
561  	} else
562  		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
563  }
564  
565  
566  /*
567   * An interrupt thread is returning from swtch(). Place a starting timestamp
568   * in its thread structure.
569   */
570  void
571  cpu_intr_swtch_exit(kthread_id_t t)
572  {
573  	uint64_t ts;
574  
575  	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
576  	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
577  
578  	do {
579  		ts = t->t_intr_start;
580  	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
581  }
582  
583  
584  int
585  blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
586  {
587  	if (&plat_blacklist)
588  		return (plat_blacklist(cmd, scheme, fmri, class));
589  
590  	return (ENOTSUP);
591  }
592  
593  int
594  kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
595  {
596  	extern void kdi_flush_caches(void);
597  	size_t nread = 0;
598  	uint32_t word;
599  	int slop, i;
600  
601  	kdi_flush_caches();
602  	membar_enter();
603  
604  	/* We might not begin on a word boundary. */
605  	if ((slop = addr & 3) != 0) {
606  		word = ldphys(addr & ~3);
607  		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
608  			*buf++ = ((uchar_t *)&word)[i];
609  		addr = roundup(addr, 4);
610  	}
611  
612  	while (nbytes > 0) {
613  		word = ldphys(addr);
614  		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
615  			*buf++ = ((uchar_t *)&word)[i];
616  	}
617  
618  	kdi_flush_caches();
619  
620  	*ncopiedp = nread;
621  	return (0);
622  }
623  
624  int
625  kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
626  {
627  	extern void kdi_flush_caches(void);
628  	size_t nwritten = 0;
629  	uint32_t word;
630  	int slop, i;
631  
632  	kdi_flush_caches();
633  
634  	/* We might not begin on a word boundary. */
635  	if ((slop = addr & 3) != 0) {
636  		word = ldphys(addr & ~3);
637  		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
638  			((uchar_t *)&word)[i] = *buf++;
639  		stphys(addr & ~3, word);
640  		addr = roundup(addr, 4);
641  	}
642  
643  	while (nbytes > 3) {
644  		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
645  			((uchar_t *)&word)[i] = *buf++;
646  		stphys(addr, word);
647  		addr += 4;
648  	}
649  
650  	/* We might not end with a whole word. */
651  	if (nbytes > 0) {
652  		word = ldphys(addr);
653  		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
654  			((uchar_t *)&word)[i] = *buf++;
655  		stphys(addr, word);
656  	}
657  
658  	membar_enter();
659  	kdi_flush_caches();
660  
661  	*ncopiedp = nwritten;
662  	return (0);
663  }
664  
665  static void
666  kdi_kernpanic(struct regs *regs, uint_t tt)
667  {
668  	sync_reg_buf = *regs;
669  	sync_tt = tt;
670  
671  	sync_handler();
672  }
673  
674  static void
675  kdi_plat_call(void (*platfn)(void))
676  {
677  	if (platfn != NULL) {
678  		prom_suspend_prepost();
679  		platfn();
680  		prom_resume_prepost();
681  	}
682  }
683  
684  /*
685   * kdi_system_claim and release are defined here for all sun4 platforms and
686   * pointed to by mach_kdi_init() to provide default callbacks for such systems.
687   * Specific sun4u or sun4v platforms may implement their own claim and release
688   * routines, at which point their respective callbacks will be updated.
689   */
690  static void
691  kdi_system_claim(void)
692  {
693  	lbolt_debug_entry();
694  }
695  
696  static void
697  kdi_system_release(void)
698  {
699  	lbolt_debug_return();
700  }
701  
702  void
703  mach_kdi_init(kdi_t *kdi)
704  {
705  	kdi->kdi_plat_call = kdi_plat_call;
706  	kdi->kdi_kmdb_enter = kmdb_enter;
707  	kdi->pkdi_system_claim = kdi_system_claim;
708  	kdi->pkdi_system_release = kdi_system_release;
709  	kdi->mkdi_cpu_index = kdi_cpu_index;
710  	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
711  	kdi->mkdi_kernpanic = kdi_kernpanic;
712  }
713  
714  
715  /*
716   * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
717   * long, and it fills in the array with the time spent on cpu in
718   * each of the mstates, where time is returned in nsec.
719   *
720   * No guarantee is made that the returned values in times[] will
721   * monotonically increase on sequential calls, although this will
722   * be true in the long run. Any such guarantee must be handled by
723   * the caller, if needed. This can happen if we fail to account
724   * for elapsed time due to a generation counter conflict, yet we
725   * did account for it on a prior call (see below).
726   *
727   * The complication is that the cpu in question may be updating
728   * its microstate at the same time that we are reading it.
729   * Because the microstate is only updated when the CPU's state
730   * changes, the values in cpu_intracct[] can be indefinitely out
731   * of date. To determine true current values, it is necessary to
732   * compare the current time with cpu_mstate_start, and add the
733   * difference to times[cpu_mstate].
734   *
735   * This can be a problem if those values are changing out from
736   * under us. Because the code path in new_cpu_mstate() is
737   * performance critical, we have not added a lock to it. Instead,
738   * we have added a generation counter. Before beginning
739   * modifications, the counter is set to 0. After modifications,
740   * it is set to the old value plus one.
741   *
742   * get_cpu_mstate() will not consider the values of cpu_mstate
743   * and cpu_mstate_start to be usable unless the value of
744   * cpu_mstate_gen is both non-zero and unchanged, both before and
745   * after reading the mstate information. Note that we must
746   * protect against out-of-order loads around accesses to the
747   * generation counter. Also, this is a best effort approach in
748   * that we do not retry should the counter be found to have
749   * changed.
750   *
751   * cpu_intracct[] is used to identify time spent in each CPU
752   * mstate while handling interrupts. Such time should be reported
753   * against system time, and so is subtracted out from its
754   * corresponding cpu_acct[] time and added to
755   * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
756   * %ticks, but acct time may be stored as %sticks, thus requiring
757   * different conversions before they can be compared.
758   */
759  
760  void
761  get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
762  {
763  	int i;
764  	hrtime_t now, start;
765  	uint16_t gen;
766  	uint16_t state;
767  	hrtime_t intracct[NCMSTATES];
768  
769  	/*
770  	 * Load all volatile state under the protection of membar.
771  	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
772  	 * of (now - cpu_mstate_start) by a change in CPU mstate that
773  	 * arrives after we make our last check of cpu_mstate_gen.
774  	 */
775  
776  	now = gethrtime_unscaled();
777  	gen = cpu->cpu_mstate_gen;
778  
779  	membar_consumer();	/* guarantee load ordering */
780  	start = cpu->cpu_mstate_start;
781  	state = cpu->cpu_mstate;
782  	for (i = 0; i < NCMSTATES; i++) {
783  		intracct[i] = cpu->cpu_intracct[i];
784  		times[i] = cpu->cpu_acct[i];
785  	}
786  	membar_consumer();	/* guarantee load ordering */
787  
788  	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
789  		times[state] += now - start;
790  
791  	for (i = 0; i < NCMSTATES; i++) {
792  		scalehrtime(&times[i]);
793  		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
794  	}
795  
796  	for (i = 0; i < NCMSTATES; i++) {
797  		if (i == CMS_SYSTEM)
798  			continue;
799  		times[i] -= intracct[i];
800  		if (times[i] < 0) {
801  			intracct[i] += times[i];
802  			times[i] = 0;
803  		}
804  		times[CMS_SYSTEM] += intracct[i];
805  	}
806  }
807  
808  void
809  mach_cpu_pause(volatile char *safe)
810  {
811  	/*
812  	 * This cpu is now safe.
813  	 */
814  	*safe = PAUSE_WAIT;
815  	membar_enter(); /* make sure stores are flushed */
816  
817  	/*
818  	 * Now we wait.  When we are allowed to continue, safe
819  	 * will be set to PAUSE_IDLE.
820  	 */
821  	while (*safe != PAUSE_IDLE)
822  		SMT_PAUSE();
823  }
824  
825  /*ARGSUSED*/
826  int
827  plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
828  {
829  	return (ENOTSUP);
830  }
831  
832  /* cpu threshold for compressed dumps */
833  #ifdef sun4v
834  uint_t dump_plat_mincpu = DUMP_PLAT_SUN4V_MINCPU;
835  #else
836  uint_t dump_plat_mincpu = DUMP_PLAT_SUN4U_MINCPU;
837  #endif
838  
839  int
840  dump_plat_addr()
841  {
842  	return (0);
843  }
844  
845  void
846  dump_plat_pfn()
847  {
848  }
849  
850  /* ARGSUSED */
851  int
852  dump_plat_data(void *dump_cdata)
853  {
854  	return (0);
855  }
856  
857  /* ARGSUSED */
858  int
859  plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
860  {
861  	return (PLAT_HOLD_OK);
862  }
863  
864  /* ARGSUSED */
865  void
866  plat_release_page(page_t *pp)
867  {
868  }
869  
870  /* ARGSUSED */
871  void
872  progressbar_key_abort(ldi_ident_t li)
873  {
874  }
875  
876  /*
877   * We need to post a soft interrupt to reprogram the lbolt cyclic when
878   * switching from event to cyclic driven lbolt. The following code adds
879   * and posts the softint for sun4 platforms.
880   */
881  static uint64_t lbolt_softint_inum;
882  
883  void
884  lbolt_softint_add(void)
885  {
886  	lbolt_softint_inum = add_softintr(LOCK_LEVEL,
887  	    (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
888  }
889  
890  void
891  lbolt_softint_post(void)
892  {
893  	setsoftint(lbolt_softint_inum);
894  }
895