xref: /titanic_41/usr/src/uts/i86pc/os/machdep.c (revision 68ac2337c38c8af06edcf32a72e42de36ec72a9d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/signal.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/mman.h>
37 #include <sys/vm.h>
38 
39 #include <sys/disp.h>
40 #include <sys/class.h>
41 
42 #include <sys/proc.h>
43 #include <sys/buf.h>
44 #include <sys/kmem.h>
45 
46 #include <sys/reboot.h>
47 #include <sys/uadmin.h>
48 #include <sys/callb.h>
49 
50 #include <sys/cred.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>
53 
54 #include <sys/procfs.h>
55 #include <sys/acct.h>
56 
57 #include <sys/vfs.h>
58 #include <sys/dnlc.h>
59 #include <sys/var.h>
60 #include <sys/cmn_err.h>
61 #include <sys/utsname.h>
62 #include <sys/debug.h>
63 #include <sys/kdi_impl.h>
64 
65 #include <sys/dumphdr.h>
66 #include <sys/bootconf.h>
67 #include <sys/varargs.h>
68 #include <sys/promif.h>
69 #include <sys/modctl.h>
70 
71 #include <sys/consdev.h>
72 #include <sys/frame.h>
73 
74 #include <sys/sunddi.h>
75 #include <sys/ddidmareq.h>
76 #include <sys/psw.h>
77 #include <sys/regset.h>
78 #include <sys/privregs.h>
79 #include <sys/clock.h>
80 #include <sys/tss.h>
81 #include <sys/cpu.h>
82 #include <sys/stack.h>
83 #include <sys/trap.h>
84 #include <sys/pic.h>
85 #include <sys/mmu.h>
86 #include <vm/hat.h>
87 #include <vm/anon.h>
88 #include <vm/as.h>
89 #include <vm/page.h>
90 #include <vm/seg.h>
91 #include <vm/seg_kmem.h>
92 #include <vm/seg_map.h>
93 #include <vm/seg_vn.h>
94 #include <vm/seg_kp.h>
95 #include <vm/hat_i86.h>
96 #include <sys/swap.h>
97 #include <sys/thread.h>
98 #include <sys/sysconf.h>
99 #include <sys/vm_machparam.h>
100 #include <sys/archsystm.h>
101 #include <sys/machsystm.h>
102 #include <sys/machlock.h>
103 #include <sys/x_call.h>
104 #include <sys/instance.h>
105 
106 #include <sys/time.h>
107 #include <sys/smp_impldefs.h>
108 #include <sys/psm_types.h>
109 #include <sys/atomic.h>
110 #include <sys/panic.h>
111 #include <sys/cpuvar.h>
112 #include <sys/dtrace.h>
113 #include <sys/bl.h>
114 #include <sys/nvpair.h>
115 #include <sys/x86_archext.h>
116 #include <sys/pool_pset.h>
117 #include <sys/autoconf.h>
118 #include <sys/kdi.h>
119 
120 #ifdef	TRAPTRACE
121 #include <sys/traptrace.h>
122 #endif	/* TRAPTRACE */
123 
124 #ifdef C2_AUDIT
125 extern void audit_enterprom(int);
126 extern void audit_exitprom(int);
127 #endif
128 
129 /*
130  * The panicbuf array is used to record messages and state:
131  */
132 char panicbuf[PANICBUFSIZE];
133 
134 /*
135  * maxphys - used during physio
136  * klustsize - used for klustering by swapfs and specfs
137  */
138 int maxphys = 56 * 1024;    /* XXX See vm_subr.c - max b_count in physio */
139 int klustsize = 56 * 1024;
140 
141 caddr_t	p0_va;		/* Virtual address for accessing physical page 0 */
142 
143 /*
144  * defined here, though unused on x86,
145  * to make kstat_fr.c happy.
146  */
147 int vac;
148 
149 void stop_other_cpus();
150 void debug_enter(char *);
151 
152 extern void pm_cfb_check_and_powerup(void);
153 extern void pm_cfb_rele(void);
154 
155 /*
156  * Machine dependent code to reboot.
157  * "mdep" is interpreted as a character pointer; if non-null, it is a pointer
158  * to a string to be used as the argument string when rebooting.
159  *
160  * "invoke_cb" is a boolean. It is set to true when mdboot() can safely
161  * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when
162  * we are in a normal shutdown sequence (interrupts are not blocked, the
163  * system is not panic'ing or being suspended).
164  */
165 /*ARGSUSED*/
166 void
167 mdboot(int cmd, int fcn, char *mdep, boolean_t invoke_cb)
168 {
169 	extern void mtrr_resync(void);
170 
171 	if (!panicstr) {
172 		kpreempt_disable();
173 		affinity_set(CPU_CURRENT);
174 	}
175 
176 	/*
177 	 * XXX - rconsvp is set to NULL to ensure that output messages
178 	 * are sent to the underlying "hardware" device using the
179 	 * monitor's printf routine since we are in the process of
180 	 * either rebooting or halting the machine.
181 	 */
182 	rconsvp = NULL;
183 
184 	/*
185 	 * Print the reboot message now, before pausing other cpus.
186 	 * There is a race condition in the printing support that
187 	 * can deadlock multiprocessor machines.
188 	 */
189 	if (!(fcn == AD_HALT || fcn == AD_POWEROFF))
190 		prom_printf("rebooting...\n");
191 
192 	/*
193 	 * We can't bring up the console from above lock level, so do it now
194 	 */
195 	pm_cfb_check_and_powerup();
196 
197 	/* make sure there are no more changes to the device tree */
198 	devtree_freeze();
199 
200 	if (invoke_cb)
201 		(void) callb_execute_class(CB_CL_MDBOOT, NULL);
202 
203 	/*
204 	 * Clear any unresolved UEs from memory.
205 	 */
206 	page_retire_mdboot();
207 
208 	/*
209 	 * stop other cpus and raise our priority.  since there is only
210 	 * one active cpu after this, and our priority will be too high
211 	 * for us to be preempted, we're essentially single threaded
212 	 * from here on out.
213 	 */
214 	(void) spl6();
215 	if (!panicstr) {
216 		mutex_enter(&cpu_lock);
217 		pause_cpus(NULL);
218 		mutex_exit(&cpu_lock);
219 	}
220 
221 	/*
222 	 * try and reset leaf devices.  reset_leaves() should only
223 	 * be called when there are no other threads that could be
224 	 * accessing devices
225 	 */
226 	reset_leaves();
227 
228 	(void) spl8();
229 	(*psm_shutdownf)(cmd, fcn);
230 
231 	mtrr_resync();
232 
233 	if (fcn == AD_HALT || fcn == AD_POWEROFF)
234 		halt((char *)NULL);
235 	else
236 		prom_reboot("");
237 	/*NOTREACHED*/
238 }
239 
240 /* mdpreboot - may be called prior to mdboot while root fs still mounted */
241 /*ARGSUSED*/
242 void
243 mdpreboot(int cmd, int fcn, char *mdep)
244 {
245 	(*psm_preshutdownf)(cmd, fcn);
246 }
247 
248 void
249 idle_other_cpus()
250 {
251 	int cpuid = CPU->cpu_id;
252 	cpuset_t xcset;
253 
254 	ASSERT(cpuid < NCPU);
255 	CPUSET_ALL_BUT(xcset, cpuid);
256 	xc_capture_cpus(xcset);
257 }
258 
259 void
260 resume_other_cpus()
261 {
262 	ASSERT(CPU->cpu_id < NCPU);
263 
264 	xc_release_cpus();
265 }
266 
267 extern void	mp_halt(char *);
268 
269 void
270 stop_other_cpus()
271 {
272 	int cpuid = CPU->cpu_id;
273 	cpuset_t xcset;
274 
275 	ASSERT(cpuid < NCPU);
276 
277 	/*
278 	 * xc_trycall will attempt to make all other CPUs execute mp_halt,
279 	 * and will return immediately regardless of whether or not it was
280 	 * able to make them do it.
281 	 */
282 	CPUSET_ALL_BUT(xcset, cpuid);
283 	xc_trycall(NULL, NULL, NULL, xcset, (int (*)())mp_halt);
284 }
285 
286 /*
287  *	Machine dependent abort sequence handling
288  */
289 void
290 abort_sequence_enter(char *msg)
291 {
292 	if (abort_enable == 0) {
293 #ifdef C2_AUDIT
294 		if (audit_active)
295 			audit_enterprom(0);
296 #endif /* C2_AUDIT */
297 		return;
298 	}
299 #ifdef C2_AUDIT
300 	if (audit_active)
301 		audit_enterprom(1);
302 #endif /* C2_AUDIT */
303 	debug_enter(msg);
304 #ifdef C2_AUDIT
305 	if (audit_active)
306 		audit_exitprom(1);
307 #endif /* C2_AUDIT */
308 }
309 
310 /*
311  * Enter debugger.  Called when the user types ctrl-alt-d or whenever
312  * code wants to enter the debugger and possibly resume later.
313  */
314 void
315 debug_enter(
316 	char	*msg)		/* message to print, possibly NULL */
317 {
318 	if (dtrace_debugger_init != NULL)
319 		(*dtrace_debugger_init)();
320 
321 	if (msg)
322 		prom_printf("%s\n", msg);
323 
324 	if (boothowto & RB_DEBUG)
325 		kdi_dvec_enter();
326 
327 	if (dtrace_debugger_fini != NULL)
328 		(*dtrace_debugger_fini)();
329 }
330 
331 void
332 reset(void)
333 {
334 	ushort_t *bios_memchk;
335 
336 	/*
337 	 * Can't use psm_map_phys before the hat is initialized.
338 	 */
339 	if (khat_running) {
340 		bios_memchk = (ushort_t *)psm_map_phys(0x472,
341 		    sizeof (ushort_t), PROT_READ | PROT_WRITE);
342 		if (bios_memchk)
343 			*bios_memchk = 0x1234;	/* bios memory check disable */
344 	}
345 
346 	if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab"))
347 		efi_reset();
348 	pc_reset();
349 	/*NOTREACHED*/
350 }
351 
352 /*
353  * Halt the machine and return to the monitor
354  */
355 void
356 halt(char *s)
357 {
358 	stop_other_cpus();	/* send stop signal to other CPUs */
359 	if (s)
360 		prom_printf("(%s) \n", s);
361 	prom_exit_to_mon();
362 	/*NOTREACHED*/
363 }
364 
365 /*
366  * Enter monitor.  Called via cross-call from stop_other_cpus().
367  */
368 void
369 mp_halt(char *msg)
370 {
371 	if (msg)
372 		prom_printf("%s\n", msg);
373 
374 	/*CONSTANTCONDITION*/
375 	while (1)
376 		;
377 }
378 
379 /*
380  * Initiate interrupt redistribution.
381  */
382 void
383 i_ddi_intr_redist_all_cpus()
384 {
385 }
386 
387 /*
388  * XXX These probably ought to live somewhere else
389  * XXX They are called from mem.c
390  */
391 
392 /*
393  * Convert page frame number to an OBMEM page frame number
394  * (i.e. put in the type bits -- zero for this implementation)
395  */
396 pfn_t
397 impl_obmem_pfnum(pfn_t pf)
398 {
399 	return (pf);
400 }
401 
402 #ifdef	NM_DEBUG
403 int nmi_test = 0;	/* checked in intentry.s during clock int */
404 int nmtest = -1;
405 nmfunc1(arg, rp)
406 int	arg;
407 struct regs *rp;
408 {
409 	printf("nmi called with arg = %x, regs = %x\n", arg, rp);
410 	nmtest += 50;
411 	if (arg == nmtest) {
412 		printf("ip = %x\n", rp->r_pc);
413 		return (1);
414 	}
415 	return (0);
416 }
417 
418 #endif
419 
420 #include <sys/bootsvcs.h>
421 
422 /* Hacked up initialization for initial kernel check out is HERE. */
423 /* The basic steps are: */
424 /*	kernel bootfuncs definition/initialization for KADB */
425 /*	kadb bootfuncs pointer initialization */
426 /*	putchar/getchar (interrupts disabled) */
427 
428 /* kadb bootfuncs pointer initialization */
429 
430 int
431 sysp_getchar()
432 {
433 	int i;
434 	int s;
435 
436 	if (cons_polledio == NULL) {
437 		/* Uh oh */
438 		prom_printf("getchar called with no console\n");
439 		for (;;)
440 			/* LOOP FOREVER */;
441 	}
442 
443 	s = clear_int_flag();
444 	i = cons_polledio->cons_polledio_getchar(
445 		cons_polledio->cons_polledio_argument);
446 	restore_int_flag(s);
447 	return (i);
448 }
449 
450 void
451 sysp_putchar(int c)
452 {
453 	int s;
454 
455 	/*
456 	 * We have no alternative but to drop the output on the floor.
457 	 */
458 	if (cons_polledio == NULL ||
459 	    cons_polledio->cons_polledio_putchar == NULL)
460 		return;
461 
462 	s = clear_int_flag();
463 	cons_polledio->cons_polledio_putchar(
464 		cons_polledio->cons_polledio_argument, c);
465 	restore_int_flag(s);
466 }
467 
468 int
469 sysp_ischar()
470 {
471 	int i;
472 	int s;
473 
474 	if (cons_polledio == NULL ||
475 	    cons_polledio->cons_polledio_ischar == NULL)
476 		return (0);
477 
478 	s = clear_int_flag();
479 	i = cons_polledio->cons_polledio_ischar(
480 		cons_polledio->cons_polledio_argument);
481 	restore_int_flag(s);
482 	return (i);
483 }
484 
485 int
486 goany(void)
487 {
488 	prom_printf("Type any key to continue ");
489 	(void) prom_getchar();
490 	prom_printf("\n");
491 	return (1);
492 }
493 
494 static struct boot_syscalls kern_sysp = {
495 	sysp_getchar,	/*	unchar	(*getchar)();	7  */
496 	sysp_putchar,	/*	int	(*putchar)();	8  */
497 	sysp_ischar,	/*	int	(*ischar)();	9  */
498 };
499 
500 void
501 kadb_uses_kernel()
502 {
503 	/*
504 	 * This routine is now totally misnamed, since it does not in fact
505 	 * control kadb's I/O; it only controls the kernel's prom_* I/O.
506 	 */
507 	sysp = &kern_sysp;
508 }
509 
510 /*
511  *	the interface to the outside world
512  */
513 
514 /*
515  * poll_port -- wait for a register to achieve a
516  *		specific state.  Arguments are a mask of bits we care about,
517  *		and two sub-masks.  To return normally, all the bits in the
518  *		first sub-mask must be ON, all the bits in the second sub-
519  *		mask must be OFF.  If about seconds pass without the register
520  *		achieving the desired bit configuration, we return 1, else
521  *		0.
522  */
523 int
524 poll_port(ushort_t port, ushort_t mask, ushort_t onbits, ushort_t offbits)
525 {
526 	int i;
527 	ushort_t maskval;
528 
529 	for (i = 500000; i; i--) {
530 		maskval = inb(port) & mask;
531 		if (((maskval & onbits) == onbits) &&
532 			((maskval & offbits) == 0))
533 			return (0);
534 		drv_usecwait(10);
535 	}
536 	return (1);
537 }
538 
539 /*
540  * set_idle_cpu is called from idle() when a CPU becomes idle.
541  */
542 /*LINTED: static unused */
543 static uint_t last_idle_cpu;
544 
545 /*ARGSUSED*/
546 void
547 set_idle_cpu(int cpun)
548 {
549 	last_idle_cpu = cpun;
550 	(*psm_set_idle_cpuf)(cpun);
551 }
552 
553 /*
554  * unset_idle_cpu is called from idle() when a CPU is no longer idle.
555  */
556 /*ARGSUSED*/
557 void
558 unset_idle_cpu(int cpun)
559 {
560 	(*psm_unset_idle_cpuf)(cpun);
561 }
562 
563 /*
564  * This routine is almost correct now, but not quite.  It still needs the
565  * equivalent concept of "hres_last_tick", just like on the sparc side.
566  * The idea is to take a snapshot of the hi-res timer while doing the
567  * hrestime_adj updates under hres_lock in locore, so that the small
568  * interval between interrupt assertion and interrupt processing is
569  * accounted for correctly.  Once we have this, the code below should
570  * be modified to subtract off hres_last_tick rather than hrtime_base.
571  *
572  * I'd have done this myself, but I don't have source to all of the
573  * vendor-specific hi-res timer routines (grrr...).  The generic hook I
574  * need is something like "gethrtime_unlocked()", which would be just like
575  * gethrtime() but would assume that you're already holding CLOCK_LOCK().
576  * This is what the GET_HRTIME() macro is for on sparc (although it also
577  * serves the function of making time available without a function call
578  * so you don't take a register window overflow while traps are disabled).
579  */
580 void
581 pc_gethrestime(timestruc_t *tp)
582 {
583 	int lock_prev;
584 	timestruc_t now;
585 	int nslt;		/* nsec since last tick */
586 	int adj;		/* amount of adjustment to apply */
587 
588 loop:
589 	lock_prev = hres_lock;
590 	now = hrestime;
591 	nslt = (int)(gethrtime() - hres_last_tick);
592 	if (nslt < 0) {
593 		/*
594 		 * nslt < 0 means a tick came between sampling
595 		 * gethrtime() and hres_last_tick; restart the loop
596 		 */
597 
598 		goto loop;
599 	}
600 	now.tv_nsec += nslt;
601 	if (hrestime_adj != 0) {
602 		if (hrestime_adj > 0) {
603 			adj = (nslt >> ADJ_SHIFT);
604 			if (adj > hrestime_adj)
605 				adj = (int)hrestime_adj;
606 		} else {
607 			adj = -(nslt >> ADJ_SHIFT);
608 			if (adj < hrestime_adj)
609 				adj = (int)hrestime_adj;
610 		}
611 		now.tv_nsec += adj;
612 	}
613 	while ((unsigned long)now.tv_nsec >= NANOSEC) {
614 
615 		/*
616 		 * We might have a large adjustment or have been in the
617 		 * debugger for a long time; take care of (at most) four
618 		 * of those missed seconds (tv_nsec is 32 bits, so
619 		 * anything >4s will be wrapping around).  However,
620 		 * anything more than 2 seconds out of sync will trigger
621 		 * timedelta from clock() to go correct the time anyway,
622 		 * so do what we can, and let the big crowbar do the
623 		 * rest.  A similar correction while loop exists inside
624 		 * hres_tick(); in all cases we'd like tv_nsec to
625 		 * satisfy 0 <= tv_nsec < NANOSEC to avoid confusing
626 		 * user processes, but if tv_sec's a little behind for a
627 		 * little while, that's OK; time still monotonically
628 		 * increases.
629 		 */
630 
631 		now.tv_nsec -= NANOSEC;
632 		now.tv_sec++;
633 	}
634 	if ((hres_lock & ~1) != lock_prev)
635 		goto loop;
636 
637 	*tp = now;
638 }
639 
640 void
641 gethrestime_lasttick(timespec_t *tp)
642 {
643 	int s;
644 
645 	s = hr_clock_lock();
646 	*tp = hrestime;
647 	hr_clock_unlock(s);
648 }
649 
650 time_t
651 gethrestime_sec(void)
652 {
653 	timestruc_t now;
654 
655 	gethrestime(&now);
656 	return (now.tv_sec);
657 }
658 
659 /*
660  * Initialize a kernel thread's stack
661  */
662 
663 caddr_t
664 thread_stk_init(caddr_t stk)
665 {
666 	ASSERT(((uintptr_t)stk & (STACK_ALIGN - 1)) == 0);
667 	return (stk - SA(MINFRAME));
668 }
669 
670 /*
671  * Initialize lwp's kernel stack.
672  */
673 
674 #ifdef TRAPTRACE
675 /*
676  * There's a tricky interdependency here between use of sysenter and
677  * TRAPTRACE which needs recording to avoid future confusion (this is
678  * about the third time I've re-figured this out ..)
679  *
680  * Here's how debugging lcall works with TRAPTRACE.
681  *
682  * 1 We're in userland with a breakpoint on the lcall instruction.
683  * 2 We execute the instruction - the instruction pushes the userland
684  *   %ss, %esp, %efl, %cs, %eip on the stack and zips into the kernel
685  *   via the call gate.
686  * 3 The hardware raises a debug trap in kernel mode, the hardware
687  *   pushes %efl, %cs, %eip and gets to dbgtrap via the idt.
688  * 4 dbgtrap pushes the error code and trapno and calls cmntrap
689  * 5 cmntrap finishes building a trap frame
690  * 6 The TRACE_REGS macros in cmntrap copy a REGSIZE worth chunk
691  *   off the stack into the traptrace buffer.
692  *
693  * This means that the traptrace buffer contains the wrong values in
694  * %esp and %ss, but everything else in there is correct.
695  *
696  * Here's how debugging sysenter works with TRAPTRACE.
697  *
698  * a We're in userland with a breakpoint on the sysenter instruction.
699  * b We execute the instruction - the instruction pushes -nothing-
700  *   on the stack, but sets %cs, %eip, %ss, %esp to prearranged
701  *   values to take us to sys_sysenter, at the top of the lwp's
702  *   stack.
703  * c goto 3
704  *
705  * At this point, because we got into the kernel without the requisite
706  * five pushes on the stack, if we didn't make extra room, we'd
707  * end up with the TRACE_REGS macro fetching the saved %ss and %esp
708  * values from negative (unmapped) stack addresses -- which really bites.
709  * That's why we do the '-= 8' below.
710  *
711  * XXX	Note that reading "up" lwp0's stack works because t0 is declared
712  *	right next to t0stack in locore.s
713  */
714 #endif
715 
716 caddr_t
717 lwp_stk_init(klwp_t *lwp, caddr_t stk)
718 {
719 	caddr_t oldstk;
720 	struct pcb *pcb = &lwp->lwp_pcb;
721 
722 	oldstk = stk;
723 	stk -= SA(sizeof (struct regs) + SA(MINFRAME));
724 #ifdef TRAPTRACE
725 	stk -= 2 * sizeof (greg_t); /* space for phony %ss:%sp (see above) */
726 #endif
727 	stk = (caddr_t)((uintptr_t)stk & ~(STACK_ALIGN - 1ul));
728 	bzero(stk, oldstk - stk);
729 	lwp->lwp_regs = (void *)(stk + SA(MINFRAME));
730 
731 	/*
732 	 * Arrange that the virtualized %fs and %gs GDT descriptors
733 	 * have a well-defined initial state (present, ring 3
734 	 * and of type data).
735 	 */
736 #if defined(__amd64)
737 	if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE)
738 		pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc;
739 	else
740 		pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_u32desc;
741 #elif defined(__i386)
742 	pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc;
743 #endif	/* __i386 */
744 	lwp_installctx(lwp);
745 	return (stk);
746 }
747 
748 /*ARGSUSED*/
749 void
750 lwp_stk_fini(klwp_t *lwp)
751 {}
752 
753 /*
754  * If we're not the panic CPU, we wait in panic_idle for reboot.
755  */
756 static void
757 panic_idle(void)
758 {
759 	splx(ipltospl(CLOCK_LEVEL));
760 	(void) setjmp(&curthread->t_pcb);
761 
762 	for (;;);
763 }
764 
765 /*
766  * Stop the other CPUs by cross-calling them and forcing them to enter
767  * the panic_idle() loop above.
768  */
769 /*ARGSUSED*/
770 void
771 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl)
772 {
773 	processorid_t i;
774 	cpuset_t xcset;
775 
776 	(void) splzs();
777 
778 	CPUSET_ALL_BUT(xcset, cp->cpu_id);
779 	xc_trycall(NULL, NULL, NULL, xcset, (int (*)())panic_idle);
780 
781 	for (i = 0; i < NCPU; i++) {
782 		if (i != cp->cpu_id && cpu[i] != NULL &&
783 		    (cpu[i]->cpu_flags & CPU_EXISTS))
784 			cpu[i]->cpu_flags |= CPU_QUIESCED;
785 	}
786 }
787 
788 /*
789  * Platform callback following each entry to panicsys().
790  */
791 /*ARGSUSED*/
792 void
793 panic_enter_hw(int spl)
794 {
795 	/* Nothing to do here */
796 }
797 
798 /*
799  * Platform-specific code to execute after panicstr is set: we invoke
800  * the PSM entry point to indicate that a panic has occurred.
801  */
802 /*ARGSUSED*/
803 void
804 panic_quiesce_hw(panic_data_t *pdp)
805 {
806 	psm_notifyf(PSM_PANIC_ENTER);
807 
808 #ifdef	TRAPTRACE
809 	/*
810 	 * Turn off TRAPTRACE
811 	 */
812 	TRAPTRACE_FREEZE;
813 #endif	/* TRAPTRACE */
814 }
815 
816 /*
817  * Platform callback prior to writing crash dump.
818  */
819 /*ARGSUSED*/
820 void
821 panic_dump_hw(int spl)
822 {
823 	/* Nothing to do here */
824 }
825 
826 /*ARGSUSED*/
827 void
828 plat_tod_fault(enum tod_fault_type tod_bad)
829 {
830 }
831 
832 /*ARGSUSED*/
833 int
834 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
835 {
836 	return (ENOTSUP);
837 }
838 
839 /*
840  * The underlying console output routines are protected by raising IPL in case
841  * we are still calling into the early boot services.  Once we start calling
842  * the kernel console emulator, it will disable interrupts completely during
843  * character rendering (see sysp_putchar, for example).  Refer to the comments
844  * and code in common/os/console.c for more information on these callbacks.
845  */
846 /*ARGSUSED*/
847 int
848 console_enter(int busy)
849 {
850 	return (splzs());
851 }
852 
853 /*ARGSUSED*/
854 void
855 console_exit(int busy, int spl)
856 {
857 	splx(spl);
858 }
859 
860 /*
861  * Allocate a region of virtual address space, unmapped.
862  * Stubbed out except on sparc, at least for now.
863  */
864 /*ARGSUSED*/
865 void *
866 boot_virt_alloc(void *addr, size_t size)
867 {
868 	return (addr);
869 }
870 
871 volatile unsigned long	tenmicrodata;
872 
873 void
874 tenmicrosec(void)
875 {
876 	extern int	tsc_gethrtime_initted;
877 	int		i;
878 
879 	if (tsc_gethrtime_initted) {
880 		hrtime_t start, end;
881 		start = end =  gethrtime();
882 		while ((end - start) < (10 * (NANOSEC / MICROSEC))) {
883 			SMT_PAUSE();
884 			end = gethrtime();
885 		}
886 	} else {
887 		/*
888 		 * Artificial loop to induce delay.
889 		 */
890 		for (i = 0; i < microdata; i++)
891 			tenmicrodata = microdata;
892 	}
893 }
894 
895 /*
896  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
897  * long, and it fills in the array with the time spent on cpu in
898  * each of the mstates, where time is returned in nsec.
899  *
900  * No guarantee is made that the returned values in times[] will
901  * monotonically increase on sequential calls, although this will
902  * be true in the long run. Any such guarantee must be handled by
903  * the caller, if needed. This can happen if we fail to account
904  * for elapsed time due to a generation counter conflict, yet we
905  * did account for it on a prior call (see below).
906  *
907  * The complication is that the cpu in question may be updating
908  * its microstate at the same time that we are reading it.
909  * Because the microstate is only updated when the CPU's state
910  * changes, the values in cpu_intracct[] can be indefinitely out
911  * of date. To determine true current values, it is necessary to
912  * compare the current time with cpu_mstate_start, and add the
913  * difference to times[cpu_mstate].
914  *
915  * This can be a problem if those values are changing out from
916  * under us. Because the code path in new_cpu_mstate() is
917  * performance critical, we have not added a lock to it. Instead,
918  * we have added a generation counter. Before beginning
919  * modifications, the counter is set to 0. After modifications,
920  * it is set to the old value plus one.
921  *
922  * get_cpu_mstate() will not consider the values of cpu_mstate
923  * and cpu_mstate_start to be usable unless the value of
924  * cpu_mstate_gen is both non-zero and unchanged, both before and
925  * after reading the mstate information. Note that we must
926  * protect against out-of-order loads around accesses to the
927  * generation counter. Also, this is a best effort approach in
928  * that we do not retry should the counter be found to have
929  * changed.
930  *
931  * cpu_intracct[] is used to identify time spent in each CPU
932  * mstate while handling interrupts. Such time should be reported
933  * against system time, and so is subtracted out from its
934  * corresponding cpu_acct[] time and added to
935  * cpu_acct[CMS_SYSTEM].
936  */
937 
938 void
939 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
940 {
941 	int i;
942 	hrtime_t now, start;
943 	uint16_t gen;
944 	uint16_t state;
945 	hrtime_t intracct[NCMSTATES];
946 
947 	/*
948 	 * Load all volatile state under the protection of membar.
949 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
950 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
951 	 * arrives after we make our last check of cpu_mstate_gen.
952 	 */
953 
954 	now = gethrtime_unscaled();
955 	gen = cpu->cpu_mstate_gen;
956 
957 	membar_consumer();	/* guarantee load ordering */
958 	start = cpu->cpu_mstate_start;
959 	state = cpu->cpu_mstate;
960 	for (i = 0; i < NCMSTATES; i++) {
961 		intracct[i] = cpu->cpu_intracct[i];
962 		times[i] = cpu->cpu_acct[i];
963 	}
964 	membar_consumer();	/* guarantee load ordering */
965 
966 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
967 		times[state] += now - start;
968 
969 	for (i = 0; i < NCMSTATES; i++) {
970 		if (i == CMS_SYSTEM)
971 			continue;
972 		times[i] -= intracct[i];
973 		if (times[i] < 0) {
974 			intracct[i] += times[i];
975 			times[i] = 0;
976 		}
977 		times[CMS_SYSTEM] += intracct[i];
978 		scalehrtime(&times[i]);
979 	}
980 	scalehrtime(&times[CMS_SYSTEM]);
981 }
982