xref: /freebsd/sys/kern/subr_trap.c (revision ef5d438ed4bc17ad7ece3e40fe4d1f9baf3aadf7)
1 /*-
2  * Copyright (C) 1994, David Greenman
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the University of Utah, and William Jolitz.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
38  *	$Id: trap.c,v 1.71 1996/01/19 03:57:42 dyson Exp $
39  */
40 
41 /*
42  * 386 Trap and System call handling
43  */
44 
45 #include "opt_ktrace.h"
46 #include "opt_ddb.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/acct.h>
52 #include <sys/kernel.h>
53 #include <sys/syscall.h>
54 #include <sys/sysent.h>
55 #include <sys/queue.h>
56 #include <sys/vmmeter.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60 
61 #include <vm/vm.h>
62 #include <vm/vm_param.h>
63 #include <vm/vm_prot.h>
64 #include <vm/lock.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_extern.h>
70 
71 #include <sys/user.h>
72 
73 #include <machine/cpu.h>
74 #include <machine/md_var.h>
75 #include <machine/psl.h>
76 #include <machine/reg.h>
77 #include <machine/trap.h>
78 #include <machine/../isa/isa_device.h>
79 
80 #ifdef POWERFAIL_NMI
81 # include <syslog.h>
82 # include <machine/clock.h>
83 #endif
84 
85 #include "isa.h"
86 #include "npx.h"
87 
88 int (*pmath_emulate) __P((struct trapframe *));
89 
90 extern void trap __P((struct trapframe frame));
91 extern int trapwrite __P((unsigned addr));
92 extern void syscall __P((struct trapframe frame));
93 extern void linux_syscall __P((struct trapframe frame));
94 
95 static int trap_pfault __P((struct trapframe *, int));
96 static void trap_fatal __P((struct trapframe *));
97 void dblfault_handler __P((void));
98 
99 extern inthand_t IDTVEC(syscall);
100 
101 #define MAX_TRAP_MSG		27
102 static char *trap_msg[] = {
103 	"",					/*  0 unused */
104 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
105 	"",					/*  2 unused */
106 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
107 	"",					/*  4 unused */
108 	"",					/*  5 unused */
109 	"arithmetic trap",			/*  6 T_ARITHTRAP */
110 	"system forced exception",		/*  7 T_ASTFLT */
111 	"",					/*  8 unused */
112 	"general protection fault",		/*  9 T_PROTFLT */
113 	"trace trap",				/* 10 T_TRCTRAP */
114 	"",					/* 11 unused */
115 	"page fault",				/* 12 T_PAGEFLT */
116 	"",					/* 13 unused */
117 	"alignment fault",			/* 14 T_ALIGNFLT */
118 	"",					/* 15 unused */
119 	"",					/* 16 unused */
120 	"",					/* 17 unused */
121 	"integer divide fault",			/* 18 T_DIVIDE */
122 	"non-maskable interrupt trap",		/* 19 T_NMI */
123 	"overflow trap",			/* 20 T_OFLOW */
124 	"FPU bounds check fault",		/* 21 T_BOUND */
125 	"FPU device not available",		/* 22 T_DNA */
126 	"double fault",				/* 23 T_DOUBLEFLT */
127 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
128 	"invalid TSS fault",			/* 25 T_TSSFLT */
129 	"segment not present fault",		/* 26 T_SEGNPFLT */
130 	"stack fault",				/* 27 T_STKFLT */
131 };
132 
133 static void userret __P((struct proc *p, struct trapframe *frame,
134 			 u_quad_t oticks));
135 
136 static inline void
137 userret(p, frame, oticks)
138 	struct proc *p;
139 	struct trapframe *frame;
140 	u_quad_t oticks;
141 {
142 	int sig, s;
143 
144 	while ((sig = CURSIG(p)) != 0)
145 		postsig(sig);
146 	p->p_priority = p->p_usrpri;
147 	if (want_resched) {
148 		/*
149 		 * Since we are curproc, clock will normally just change
150 		 * our priority without moving us from one queue to another
151 		 * (since the running process is not on a queue.)
152 		 * If that happened after we setrunqueue ourselves but before we
153 		 * mi_switch()'ed, we might not be on the queue indicated by
154 		 * our priority.
155 		 */
156 		s = splclock();
157 		setrunqueue(p);
158 		p->p_stats->p_ru.ru_nivcsw++;
159 		mi_switch();
160 		splx(s);
161 		while ((sig = CURSIG(p)) != 0)
162 			postsig(sig);
163 	}
164 	/*
165 	 * Charge system time if profiling.
166 	 */
167 	if (p->p_flag & P_PROFIL) {
168 		u_quad_t ticks = p->p_sticks - oticks;
169 
170 		if (ticks) {
171 #ifdef PROFTIMER
172 			extern int profscale;
173 			addupc(frame->tf_eip, &p->p_stats->p_prof,
174 			    ticks * profscale);
175 #else
176 			addupc(frame->tf_eip, &p->p_stats->p_prof, ticks);
177 #endif
178 		}
179 	}
180 	curpriority = p->p_priority;
181 }
182 
183 /*
184  * Exception, fault, and trap interface to the FreeBSD kernel.
185  * This common code is called from assembly language IDT gate entry
186  * routines that prepare a suitable stack frame, and restore this
187  * frame after the exception has been processed.
188  */
189 
190 void
191 trap(frame)
192 	struct trapframe frame;
193 {
194 	struct proc *p = curproc;
195 	u_quad_t sticks = 0;
196 	int i = 0, ucode = 0, type, code;
197 #ifdef DEBUG
198 	u_long eva;
199 #endif
200 
201 	type = frame.tf_trapno;
202 	code = frame.tf_err;
203 
204 	if (ISPL(frame.tf_cs) == SEL_UPL) {
205 		/* user trap */
206 
207 		sticks = p->p_sticks;
208 		p->p_md.md_regs = (int *)&frame;
209 
210 		switch (type) {
211 		case T_PRIVINFLT:	/* privileged instruction fault */
212 			ucode = type;
213 			i = SIGILL;
214 			break;
215 
216 		case T_BPTFLT:		/* bpt instruction fault */
217 		case T_TRCTRAP:		/* trace trap */
218 			frame.tf_eflags &= ~PSL_T;
219 			i = SIGTRAP;
220 			break;
221 
222 		case T_ARITHTRAP:	/* arithmetic trap */
223 			ucode = code;
224 			i = SIGFPE;
225 			break;
226 
227 		case T_ASTFLT:		/* Allow process switch */
228 			astoff();
229 			cnt.v_soft++;
230 			if (p->p_flag & P_OWEUPC) {
231 				addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
232 				p->p_flag &= ~P_OWEUPC;
233 			}
234 			goto out;
235 
236 		case T_PROTFLT:		/* general protection fault */
237 		case T_SEGNPFLT:	/* segment not present fault */
238 		case T_STKFLT:		/* stack fault */
239 		case T_TSSFLT:		/* invalid TSS fault */
240 		case T_DOUBLEFLT:	/* double fault */
241 		default:
242 			ucode = code + BUS_SEGM_FAULT ;
243 			i = SIGBUS;
244 			break;
245 
246 		case T_PAGEFLT:		/* page fault */
247 			i = trap_pfault(&frame, TRUE);
248 			if (i == -1)
249 				return;
250 			if (i == 0)
251 				goto out;
252 
253 			ucode = T_PAGEFLT;
254 			break;
255 
256 		case T_DIVIDE:		/* integer divide fault */
257 			ucode = FPE_INTDIV_TRAP;
258 			i = SIGFPE;
259 			break;
260 
261 #if NISA > 0
262 		case T_NMI:
263 #ifdef POWERFAIL_NMI
264 			goto handle_powerfail;
265 #else /* !POWERFAIL_NMI */
266 #ifdef DDB
267 			/* NMI can be hooked up to a pushbutton for debugging */
268 			printf ("NMI ... going to debugger\n");
269 			if (kdb_trap (type, 0, &frame))
270 				return;
271 #endif /* DDB */
272 			/* machine/parity/power fail/"kitchen sink" faults */
273 			if (isa_nmi(code) == 0) return;
274 			panic("NMI indicates hardware failure");
275 #endif /* POWERFAIL_NMI */
276 #endif /* NISA > 0 */
277 
278 		case T_OFLOW:		/* integer overflow fault */
279 			ucode = FPE_INTOVF_TRAP;
280 			i = SIGFPE;
281 			break;
282 
283 		case T_BOUND:		/* bounds check fault */
284 			ucode = FPE_SUBRNG_TRAP;
285 			i = SIGFPE;
286 			break;
287 
288 		case T_DNA:
289 #if NNPX > 0
290 			/* if a transparent fault (due to context switch "late") */
291 			if (npxdna())
292 				return;
293 #endif	/* NNPX > 0 */
294 
295 			if (!pmath_emulate) {
296 				i = SIGFPE;
297 				ucode = FPE_FPU_NP_TRAP;
298 				break;
299 			}
300 			i = (*pmath_emulate)(&frame);
301 			if (i == 0) {
302 				if (!(frame.tf_eflags & PSL_T))
303 					return;
304 				frame.tf_eflags &= ~PSL_T;
305 				i = SIGTRAP;
306 			}
307 			/* else ucode = emulator_only_knows() XXX */
308 			break;
309 
310 		case T_FPOPFLT:		/* FPU operand fetch fault */
311 			ucode = T_FPOPFLT;
312 			i = SIGILL;
313 			break;
314 		}
315 	} else {
316 		/* kernel trap */
317 
318 		switch (type) {
319 		case T_PAGEFLT:			/* page fault */
320 			(void) trap_pfault(&frame, FALSE);
321 			return;
322 
323 		case T_PROTFLT:		/* general protection fault */
324 		case T_SEGNPFLT:	/* segment not present fault */
325 			/*
326 			 * Invalid segment selectors and out of bounds
327 			 * %eip's and %esp's can be set up in user mode.
328 			 * This causes a fault in kernel mode when the
329 			 * kernel tries to return to user mode.  We want
330 			 * to get this fault so that we can fix the
331 			 * problem here and not have to check all the
332 			 * selectors and pointers when the user changes
333 			 * them.
334 			 */
335 #define	MAYBE_DORETI_FAULT(where, whereto)				\
336 	do {								\
337 		if (frame.tf_eip == (int)where) {			\
338 			frame.tf_eip = (int)whereto;			\
339 			return;						\
340 		}							\
341 	} while (0)
342 
343 			if (intr_nesting_level == 0) {
344 				MAYBE_DORETI_FAULT(doreti_iret,
345 						   doreti_iret_fault);
346 				MAYBE_DORETI_FAULT(doreti_popl_ds,
347 						   doreti_popl_ds_fault);
348 				MAYBE_DORETI_FAULT(doreti_popl_es,
349 						   doreti_popl_es_fault);
350 			}
351 			if (curpcb && curpcb->pcb_onfault) {
352 				frame.tf_eip = (int)curpcb->pcb_onfault;
353 				return;
354 			}
355 			break;
356 
357 		case T_TSSFLT:
358 			/*
359 			 * PSL_NT can be set in user mode and isn't cleared
360 			 * automatically when the kernel is entered.  This
361 			 * causes a TSS fault when the kernel attempts to
362 			 * `iret' because the TSS link is uninitialized.  We
363 			 * want to get this fault so that we can fix the
364 			 * problem here and not every time the kernel is
365 			 * entered.
366 			 */
367 			if (frame.tf_eflags & PSL_NT) {
368 				frame.tf_eflags &= ~PSL_NT;
369 				return;
370 			}
371 			break;
372 
373 		case T_TRCTRAP:	 /* trace trap */
374 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
375 				/*
376 				 * We've just entered system mode via the
377 				 * syscall lcall.  Continue single stepping
378 				 * silently until the syscall handler has
379 				 * saved the flags.
380 				 */
381 				return;
382 			}
383 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
384 				/*
385 				 * The syscall handler has now saved the
386 				 * flags.  Stop single stepping it.
387 				 */
388 				frame.tf_eflags &= ~PSL_T;
389 				return;
390 			}
391 			/*
392 			 * Fall through.
393 			 */
394 		case T_BPTFLT:
395 			/*
396 			 * If DDB is enabled, let it handle the debugger trap.
397 			 * Otherwise, debugger traps "can't happen".
398 			 */
399 #ifdef DDB
400 			if (kdb_trap (type, 0, &frame))
401 				return;
402 #endif
403 			break;
404 
405 #if NISA > 0
406 		case T_NMI:
407 #ifdef POWERFAIL_NMI
408 #ifndef TIMER_FREQ
409 #  define TIMER_FREQ 1193182
410 #endif
411 	handle_powerfail:
412 		{
413 		  static unsigned lastalert = 0;
414 
415 		  if(time.tv_sec - lastalert > 10)
416 		    {
417 		      log(LOG_WARNING, "NMI: power fail\n");
418 		      sysbeep(TIMER_FREQ/880, hz);
419 		      lastalert = time.tv_sec;
420 		    }
421 		  return;
422 		}
423 #else /* !POWERFAIL_NMI */
424 #ifdef DDB
425 			/* NMI can be hooked up to a pushbutton for debugging */
426 			printf ("NMI ... going to debugger\n");
427 			if (kdb_trap (type, 0, &frame))
428 				return;
429 #endif /* DDB */
430 			/* machine/parity/power fail/"kitchen sink" faults */
431 			if (isa_nmi(code) == 0) return;
432 			/* FALL THROUGH */
433 #endif /* POWERFAIL_NMI */
434 #endif /* NISA > 0 */
435 		}
436 
437 		trap_fatal(&frame);
438 		return;
439 	}
440 
441 	trapsignal(p, i, ucode);
442 
443 #ifdef DEBUG
444 	eva = rcr2();
445 	if (type <= MAX_TRAP_MSG) {
446 		uprintf("fatal process exception: %s",
447 			trap_msg[type]);
448 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
449 			uprintf(", fault VA = 0x%x", eva);
450 		uprintf("\n");
451 	}
452 #endif
453 
454 out:
455 	userret(p, &frame, sticks);
456 }
457 
458 #ifdef notyet
459 /*
460  * This version doesn't allow a page fault to user space while
461  * in the kernel. The rest of the kernel needs to be made "safe"
462  * before this can be used. I think the only things remaining
463  * to be made safe are the iBCS2 code and the process tracing/
464  * debugging code.
465  */
466 static int
467 trap_pfault(frame, usermode)
468 	struct trapframe *frame;
469 	int usermode;
470 {
471 	vm_offset_t va;
472 	struct vmspace *vm = NULL;
473 	vm_map_t map = 0;
474 	int rv = 0;
475 	vm_prot_t ftype;
476 	int eva;
477 	struct proc *p = curproc;
478 
479 	if (frame->tf_err & PGEX_W)
480 		ftype = VM_PROT_READ | VM_PROT_WRITE;
481 	else
482 		ftype = VM_PROT_READ;
483 
484 	eva = rcr2();
485 	va = trunc_page((vm_offset_t)eva);
486 
487 	if (va < VM_MIN_KERNEL_ADDRESS) {
488 		vm_offset_t v;
489 		vm_page_t mpte;
490 
491 		if (p == NULL ||
492 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
493 		    (curpcb == NULL || curpcb->pcb_onfault == NULL))) {
494 			trap_fatal(frame);
495 			return (-1);
496 		}
497 
498 		/*
499 		 * This is a fault on non-kernel virtual memory.
500 		 * vm is initialized above to NULL. If curproc is NULL
501 		 * or curproc->p_vmspace is NULL the fault is fatal.
502 		 */
503 		vm = p->p_vmspace;
504 		if (vm == NULL)
505 			goto nogo;
506 
507 		map = &vm->vm_map;
508 
509 		/*
510 		 * Keep swapout from messing with us during this
511 		 *	critical time.
512 		 */
513 		++p->p_lock;
514 
515 		/*
516 		 * Grow the stack if necessary
517 		 */
518 		if ((caddr_t)va > vm->vm_maxsaddr
519 		    && (caddr_t)va < (caddr_t)USRSTACK) {
520 			if (!grow(p, va)) {
521 				rv = KERN_FAILURE;
522 				--p->p_lock;
523 				goto nogo;
524 			}
525 		}
526 
527 		/*
528 		 * Check if page table is mapped, if not,
529 		 *	fault it first
530 		 */
531 		v = (vm_offset_t) vtopte(va);
532 
533 		/* Fault the pte only if needed: */
534 		if (*((int *)vtopte(v)) == 0)
535 			(void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE);
536 
537 		mpte = pmap_use_pt( vm_map_pmap(map), va);
538 
539 		/* Fault in the user page: */
540 		rv = vm_fault(map, va, ftype, FALSE);
541 
542 		pmap_unuse_pt( vm_map_pmap(map), va, mpte);
543 
544 		--p->p_lock;
545 	} else {
546 		/*
547 		 * Don't allow user-mode faults in kernel address space.
548 		 */
549 		if (usermode)
550 			goto nogo;
551 
552 		/*
553 		 * Since we know that kernel virtual address addresses
554 		 * always have pte pages mapped, we just have to fault
555 		 * the page.
556 		 */
557 		rv = vm_fault(kernel_map, va, ftype, FALSE);
558 	}
559 
560 	if (rv == KERN_SUCCESS)
561 		return (0);
562 nogo:
563 	if (!usermode) {
564 		if (curpcb && curpcb->pcb_onfault) {
565 			frame->tf_eip = (int)curpcb->pcb_onfault;
566 			return (0);
567 		}
568 		trap_fatal(frame);
569 		return (-1);
570 	}
571 
572 	/* kludge to pass faulting virtual address to sendsig */
573 	frame->tf_err = eva;
574 
575 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
576 }
577 #endif
578 
579 int
580 trap_pfault(frame, usermode)
581 	struct trapframe *frame;
582 	int usermode;
583 {
584 	vm_offset_t va;
585 	struct vmspace *vm = NULL;
586 	vm_map_t map = 0;
587 	int rv = 0;
588 	vm_prot_t ftype;
589 	int eva;
590 	struct proc *p = curproc;
591 
592 	eva = rcr2();
593 	va = trunc_page((vm_offset_t)eva);
594 
595 	if (va >= KERNBASE) {
596 		/*
597 		 * Don't allow user-mode faults in kernel address space.
598 		 */
599 		if (usermode)
600 			goto nogo;
601 
602 		map = kernel_map;
603 	} else {
604 		/*
605 		 * This is a fault on non-kernel virtual memory.
606 		 * vm is initialized above to NULL. If curproc is NULL
607 		 * or curproc->p_vmspace is NULL the fault is fatal.
608 		 */
609 		if (p != NULL)
610 			vm = p->p_vmspace;
611 
612 		if (vm == NULL)
613 			goto nogo;
614 
615 		map = &vm->vm_map;
616 	}
617 
618 	if (frame->tf_err & PGEX_W)
619 		ftype = VM_PROT_READ | VM_PROT_WRITE;
620 	else
621 		ftype = VM_PROT_READ;
622 
623 	if (map != kernel_map) {
624 		vm_offset_t v;
625 		vm_page_t mpte;
626 
627 		/*
628 		 * Keep swapout from messing with us during this
629 		 *	critical time.
630 		 */
631 		++p->p_lock;
632 
633 		/*
634 		 * Grow the stack if necessary
635 		 */
636 		if ((caddr_t)va > vm->vm_maxsaddr
637 		    && (caddr_t)va < (caddr_t)USRSTACK) {
638 			if (!grow(p, va)) {
639 				rv = KERN_FAILURE;
640 				--p->p_lock;
641 				goto nogo;
642 			}
643 		}
644 
645 		/*
646 		 * Check if page table is mapped, if not,
647 		 *	fault it first
648 		 */
649 		v = (vm_offset_t) vtopte(va);
650 
651 		/* Fault the pte only if needed: */
652 		if (*((int *)vtopte(v)) == 0)
653 			(void) vm_fault(map,
654 				trunc_page(v), VM_PROT_WRITE, FALSE);
655 
656 		mpte = pmap_use_pt( vm_map_pmap(map), va);
657 
658 		/* Fault in the user page: */
659 		rv = vm_fault(map, va, ftype, FALSE);
660 
661 		pmap_unuse_pt( vm_map_pmap(map), va, mpte);
662 
663 		--p->p_lock;
664 	} else {
665 		/*
666 		 * Since we know that kernel virtual address addresses
667 		 * always have pte pages mapped, we just have to fault
668 		 * the page.
669 		 */
670 		rv = vm_fault(map, va, ftype, FALSE);
671 	}
672 
673 	if (rv == KERN_SUCCESS)
674 		return (0);
675 nogo:
676 	if (!usermode) {
677 		if (curpcb && curpcb->pcb_onfault) {
678 			frame->tf_eip = (int)curpcb->pcb_onfault;
679 			return (0);
680 		}
681 		trap_fatal(frame);
682 		return (-1);
683 	}
684 
685 	/* kludge to pass faulting virtual address to sendsig */
686 	frame->tf_err = eva;
687 
688 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
689 }
690 
691 static void
692 trap_fatal(frame)
693 	struct trapframe *frame;
694 {
695 	int code, type, eva;
696 	struct soft_segment_descriptor softseg;
697 
698 	code = frame->tf_err;
699 	type = frame->tf_trapno;
700 	eva = rcr2();
701 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
702 
703 	if (type <= MAX_TRAP_MSG)
704 		printf("\n\nFatal trap %d: %s while in %s mode\n",
705 			type, trap_msg[type],
706 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
707 	if (type == T_PAGEFLT) {
708 		printf("fault virtual address	= 0x%x\n", eva);
709 		printf("fault code		= %s %s, %s\n",
710 			code & PGEX_U ? "user" : "supervisor",
711 			code & PGEX_W ? "write" : "read",
712 			code & PGEX_P ? "protection violation" : "page not present");
713 	}
714 	printf("instruction pointer	= 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip);
715 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
716 	    softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
717 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
718 	    softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran);
719 	printf("processor eflags	= ");
720 	if (frame->tf_eflags & PSL_T)
721 		printf("trace/trap, ");
722 	if (frame->tf_eflags & PSL_I)
723 		printf("interrupt enabled, ");
724 	if (frame->tf_eflags & PSL_NT)
725 		printf("nested task, ");
726 	if (frame->tf_eflags & PSL_RF)
727 		printf("resume, ");
728 	if (frame->tf_eflags & PSL_VM)
729 		printf("vm86, ");
730 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
731 	printf("current process		= ");
732 	if (curproc) {
733 		printf("%lu (%s)\n",
734 		    (u_long)curproc->p_pid, curproc->p_comm ?
735 		    curproc->p_comm : "");
736 	} else {
737 		printf("Idle\n");
738 	}
739 	printf("interrupt mask		= ");
740 	if ((cpl & net_imask) == net_imask)
741 		printf("net ");
742 	if ((cpl & tty_imask) == tty_imask)
743 		printf("tty ");
744 	if ((cpl & bio_imask) == bio_imask)
745 		printf("bio ");
746 	if (cpl == 0)
747 		printf("none");
748 	printf("\n");
749 
750 #ifdef KDB
751 	if (kdb_trap(&psl))
752 		return;
753 #endif
754 #ifdef DDB
755 	if (kdb_trap (type, 0, frame))
756 		return;
757 #endif
758 	if (type <= MAX_TRAP_MSG)
759 		panic(trap_msg[type]);
760 	else
761 		panic("unknown/reserved trap");
762 }
763 
764 /*
765  * Double fault handler. Called when a fault occurs while writing
766  * a frame for a trap/exception onto the stack. This usually occurs
767  * when the stack overflows (such is the case with infinite recursion,
768  * for example).
769  *
770  * XXX Note that the current PTD gets replaced by IdlePTD when the
771  * task switch occurs. This means that the stack that was active at
772  * the time of the double fault is not available at <kstack> unless
773  * the machine was idle when the double fault occurred. The downside
774  * of this is that "trace <ebp>" in ddb won't work.
775  */
776 void
777 dblfault_handler()
778 {
779 	struct pcb *pcb = curpcb;
780 
781 	if (pcb != NULL) {
782 		printf("\nFatal double fault:\n");
783 		printf("eip = 0x%x\n", pcb->pcb_tss.tss_eip);
784 		printf("esp = 0x%x\n", pcb->pcb_tss.tss_esp);
785 		printf("ebp = 0x%x\n", pcb->pcb_tss.tss_ebp);
786 	}
787 
788 	panic("double fault");
789 }
790 
791 /*
792  * Compensate for 386 brain damage (missing URKR).
793  * This is a little simpler than the pagefault handler in trap() because
794  * it the page tables have already been faulted in and high addresses
795  * are thrown out early for other reasons.
796  */
797 int trapwrite(addr)
798 	unsigned addr;
799 {
800 	struct proc *p;
801 	vm_offset_t va, v;
802 	struct vmspace *vm;
803 	int rv;
804 
805 	va = trunc_page((vm_offset_t)addr);
806 	/*
807 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
808 	 */
809 	if (va >= VM_MAXUSER_ADDRESS)
810 		return (1);
811 
812 	p = curproc;
813 	vm = p->p_vmspace;
814 
815 	++p->p_lock;
816 
817 	if ((caddr_t)va >= vm->vm_maxsaddr
818 	    && (caddr_t)va < (caddr_t)USRSTACK) {
819 		if (!grow(p, va)) {
820 			--p->p_lock;
821 			return (1);
822 		}
823 	}
824 
825 	v = trunc_page(vtopte(va));
826 
827 	/*
828 	 * wire the pte page
829 	 */
830 	if (va < USRSTACK) {
831 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
832 	}
833 
834 	/*
835 	 * fault the data page
836 	 */
837 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
838 
839 	/*
840 	 * unwire the pte page
841 	 */
842 	if (va < USRSTACK) {
843 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
844 	}
845 
846 	--p->p_lock;
847 
848 	if (rv != KERN_SUCCESS)
849 		return 1;
850 
851 	return (0);
852 }
853 
854 /*
855  * System call request from POSIX system call gate interface to kernel.
856  * Like trap(), argument is call by reference.
857  */
858 void
859 syscall(frame)
860 	struct trapframe frame;
861 {
862 	caddr_t params;
863 	int i;
864 	struct sysent *callp;
865 	struct proc *p = curproc;
866 	u_quad_t sticks;
867 	int error;
868 	int args[8], rval[2];
869 	u_int code;
870 
871 	sticks = p->p_sticks;
872 	if (ISPL(frame.tf_cs) != SEL_UPL)
873 		panic("syscall");
874 
875 	p->p_md.md_regs = (int *)&frame;
876 	params = (caddr_t)frame.tf_esp + sizeof(int);
877 	code = frame.tf_eax;
878 	/*
879 	 * Need to check if this is a 32 bit or 64 bit syscall.
880 	 */
881 	if (code == SYS_syscall) {
882 		/*
883 		 * Code is first argument, followed by actual args.
884 		 */
885 		code = fuword(params);
886 		params += sizeof(int);
887 	} else if (code == SYS___syscall) {
888 		/*
889 		 * Like syscall, but code is a quad, so as to maintain
890 		 * quad alignment for the rest of the arguments.
891 		 */
892 		code = fuword(params);
893 		params += sizeof(quad_t);
894 	}
895 
896  	if (p->p_sysent->sv_mask)
897  		code &= p->p_sysent->sv_mask;
898 
899  	if (code >= p->p_sysent->sv_size)
900  		callp = &p->p_sysent->sv_table[0];
901   	else
902  		callp = &p->p_sysent->sv_table[code];
903 
904 	if ((i = callp->sy_narg * sizeof(int)) &&
905 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
906 #ifdef KTRACE
907 		if (KTRPOINT(p, KTR_SYSCALL))
908 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
909 #endif
910 		goto bad;
911 	}
912 #ifdef KTRACE
913 	if (KTRPOINT(p, KTR_SYSCALL))
914 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
915 #endif
916 	rval[0] = 0;
917 	rval[1] = frame.tf_edx;
918 
919 	error = (*callp->sy_call)(p, args, rval);
920 
921 	switch (error) {
922 
923 	case 0:
924 		/*
925 		 * Reinitialize proc pointer `p' as it may be different
926 		 * if this is a child returning from fork syscall.
927 		 */
928 		p = curproc;
929 		frame.tf_eax = rval[0];
930 		frame.tf_edx = rval[1];
931 		frame.tf_eflags &= ~PSL_C;
932 		break;
933 
934 	case ERESTART:
935 		/*
936 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes.
937 		 */
938 		frame.tf_eip -= 7;
939 		break;
940 
941 	case EJUSTRETURN:
942 		break;
943 
944 	default:
945 bad:
946  		if (p->p_sysent->sv_errsize)
947  			if (error >= p->p_sysent->sv_errsize)
948   				error = -1;	/* XXX */
949    			else
950   				error = p->p_sysent->sv_errtbl[error];
951 		frame.tf_eax = error;
952 		frame.tf_eflags |= PSL_C;
953 		break;
954 	}
955 
956 	if (frame.tf_eflags & PSL_T) {
957 		/* Traced syscall. */
958 		frame.tf_eflags &= ~PSL_T;
959 		trapsignal(p, SIGTRAP, 0);
960 	}
961 
962 	userret(p, &frame, sticks);
963 
964 #ifdef KTRACE
965 	if (KTRPOINT(p, KTR_SYSRET))
966 		ktrsysret(p->p_tracep, code, error, rval[0]);
967 #endif
968 }
969 
970 #if defined(COMPAT_LINUX) || defined(LINUX)
971 void
972 linux_syscall(frame)
973 	struct trapframe frame;
974 {
975 	struct proc *p = curproc;
976 	struct sysent *callp;
977 	u_quad_t sticks;
978 	int error;
979 	int rval[2];
980 	u_int code;
981 	struct linux_syscall_args {
982 		int arg1;
983 		int arg2;
984 		int arg3;
985 		int arg4;
986 		int arg5;
987 	} args;
988 
989 	args.arg1 = frame.tf_ebx;
990 	args.arg2 = frame.tf_ecx;
991 	args.arg3 = frame.tf_edx;
992 	args.arg4 = frame.tf_esi;
993 	args.arg5 = frame.tf_edi;
994 
995 	sticks = p->p_sticks;
996 	if (ISPL(frame.tf_cs) != SEL_UPL)
997 		panic("linux syscall");
998 
999 	p->p_md.md_regs = (int *)&frame;
1000 	code = frame.tf_eax;
1001 
1002 	if (p->p_sysent->sv_mask)
1003 		code &= p->p_sysent->sv_mask;
1004 
1005 	if (code >= p->p_sysent->sv_size)
1006 		callp = &p->p_sysent->sv_table[0];
1007 	else
1008 		callp = &p->p_sysent->sv_table[code];
1009 
1010 #ifdef KTRACE
1011 	if (KTRPOINT(p, KTR_SYSCALL))
1012 		ktrsyscall(p->p_tracep, code, callp->sy_narg, (int *)&args);
1013 #endif
1014 
1015 	rval[0] = 0;
1016 
1017 	error = (*callp->sy_call)(p, &args, rval);
1018 
1019 	switch (error) {
1020 
1021 	case 0:
1022 		/*
1023 		 * Reinitialize proc pointer `p' as it may be different
1024 		 * if this is a child returning from fork syscall.
1025 		 */
1026 		p = curproc;
1027 		frame.tf_eax = rval[0];
1028 		frame.tf_eflags &= ~PSL_C;
1029 		break;
1030 
1031 	case ERESTART:
1032 		/* Reconstruct pc, subtract size of int 0x80 */
1033 		frame.tf_eip -= 2;
1034 		break;
1035 
1036 	case EJUSTRETURN:
1037 		break;
1038 
1039 	default:
1040  		if (p->p_sysent->sv_errsize)
1041  			if (error >= p->p_sysent->sv_errsize)
1042   				error = -1;	/* XXX */
1043    			else
1044   				error = p->p_sysent->sv_errtbl[error];
1045 		frame.tf_eax = -error;
1046 		frame.tf_eflags |= PSL_C;
1047 		break;
1048 	}
1049 
1050 	if (frame.tf_eflags & PSL_T) {
1051 		/* Traced syscall. */
1052 		frame.tf_eflags &= ~PSL_T;
1053 		trapsignal(p, SIGTRAP, 0);
1054 	}
1055 
1056 	userret(p, &frame, sticks);
1057 
1058 #ifdef KTRACE
1059 	if (KTRPOINT(p, KTR_SYSRET))
1060 		ktrsysret(p->p_tracep, code, error, rval[0]);
1061 #endif
1062 }
1063 #endif /* COMPAT_LINUX || LINUX */
1064