xref: /freebsd/sys/kern/subr_trap.c (revision 0ea3482342b4d7d6e71f3007ce4dafe445c639fd)
1 /*-
2  * Copyright (C) 1994, David Greenman
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the University of Utah, and William Jolitz.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
38  *	$Id: trap.c,v 1.61 1995/10/09 04:36:01 bde Exp $
39  */
40 
41 /*
42  * 386 Trap and System call handling
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/user.h>
49 #include <sys/acct.h>
50 #include <sys/kernel.h>
51 #include <sys/syscall.h>
52 #include <sys/sysent.h>
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56 
57 #include <vm/vm_param.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_page.h>
62 
63 #include <machine/cpu.h>
64 #include <machine/md_var.h>
65 #include <machine/psl.h>
66 #include <machine/reg.h>
67 #include <machine/trap.h>
68 #include <machine/../isa/isa_device.h>
69 
70 #ifdef POWERFAIL_NMI
71 # include <syslog.h>
72 # include <machine/clock.h>
73 #endif
74 
75 #include "isa.h"
76 #include "npx.h"
77 
78 extern void trap __P((struct trapframe frame));
79 extern int trapwrite __P((unsigned addr));
80 extern void syscall __P((struct trapframe frame));
81 extern void linux_syscall __P((struct trapframe frame));
82 
83 int	trap_pfault	__P((struct trapframe *, int));
84 void	trap_fatal	__P((struct trapframe *));
85 
86 extern inthand_t IDTVEC(syscall);
87 
88 #define MAX_TRAP_MSG		27
89 char *trap_msg[] = {
90 	"",					/*  0 unused */
91 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
92 	"",					/*  2 unused */
93 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
94 	"",					/*  4 unused */
95 	"",					/*  5 unused */
96 	"arithmetic trap",			/*  6 T_ARITHTRAP */
97 	"system forced exception",		/*  7 T_ASTFLT */
98 	"",					/*  8 unused */
99 	"general protection fault",		/*  9 T_PROTFLT */
100 	"trace trap",				/* 10 T_TRCTRAP */
101 	"",					/* 11 unused */
102 	"page fault",				/* 12 T_PAGEFLT */
103 	"",					/* 13 unused */
104 	"alignment fault",			/* 14 T_ALIGNFLT */
105 	"",					/* 15 unused */
106 	"",					/* 16 unused */
107 	"",					/* 17 unused */
108 	"integer divide fault",			/* 18 T_DIVIDE */
109 	"non-maskable interrupt trap",		/* 19 T_NMI */
110 	"overflow trap",			/* 20 T_OFLOW */
111 	"FPU bounds check fault",		/* 21 T_BOUND */
112 	"FPU device not available",		/* 22 T_DNA */
113 	"double fault",				/* 23 T_DOUBLEFLT */
114 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
115 	"invalid TSS fault",			/* 25 T_TSSFLT */
116 	"segment not present fault",		/* 26 T_SEGNPFLT */
117 	"stack fault",				/* 27 T_STKFLT */
118 };
119 
120 static void userret __P((struct proc *p, struct trapframe *frame,
121 			 u_quad_t oticks));
122 
123 static inline void
124 userret(p, frame, oticks)
125 	struct proc *p;
126 	struct trapframe *frame;
127 	u_quad_t oticks;
128 {
129 	int sig, s;
130 
131 	while ((sig = CURSIG(p)) != 0)
132 		postsig(sig);
133 	p->p_priority = p->p_usrpri;
134 	if (want_resched) {
135 		/*
136 		 * Since we are curproc, clock will normally just change
137 		 * our priority without moving us from one queue to another
138 		 * (since the running process is not on a queue.)
139 		 * If that happened after we setrunqueue ourselves but before we
140 		 * mi_switch()'ed, we might not be on the queue indicated by
141 		 * our priority.
142 		 */
143 		s = splclock();
144 		setrunqueue(p);
145 		p->p_stats->p_ru.ru_nivcsw++;
146 		mi_switch();
147 		splx(s);
148 		while ((sig = CURSIG(p)) != 0)
149 			postsig(sig);
150 	}
151 	/*
152 	 * Charge system time if profiling.
153 	 */
154 	if (p->p_flag & P_PROFIL) {
155 		u_quad_t ticks = p->p_sticks - oticks;
156 
157 		if (ticks) {
158 #ifdef PROFTIMER
159 			extern int profscale;
160 			addupc(frame->tf_eip, &p->p_stats->p_prof,
161 			    ticks * profscale);
162 #else
163 			addupc(frame->tf_eip, &p->p_stats->p_prof, ticks);
164 #endif
165 		}
166 	}
167 	curpriority = p->p_priority;
168 }
169 
170 /*
171  * Exception, fault, and trap interface to the FreeBSD kernel.
172  * This common code is called from assembly language IDT gate entry
173  * routines that prepare a suitable stack frame, and restore this
174  * frame after the exception has been processed.
175  */
176 
177 void
178 trap(frame)
179 	struct trapframe frame;
180 {
181 	struct proc *p = curproc;
182 	u_quad_t sticks = 0;
183 	int i = 0, ucode = 0, type, code;
184 #ifdef DEBUG
185 	u_long eva;
186 #endif
187 
188 	type = frame.tf_trapno;
189 	code = frame.tf_err;
190 
191 	if (ISPL(frame.tf_cs) == SEL_UPL) {
192 		/* user trap */
193 
194 		sticks = p->p_sticks;
195 		p->p_md.md_regs = (int *)&frame;
196 
197 		switch (type) {
198 		case T_PRIVINFLT:	/* privileged instruction fault */
199 			ucode = type;
200 			i = SIGILL;
201 			break;
202 
203 		case T_BPTFLT:		/* bpt instruction fault */
204 		case T_TRCTRAP:		/* trace trap */
205 			frame.tf_eflags &= ~PSL_T;
206 			i = SIGTRAP;
207 			break;
208 
209 		case T_ARITHTRAP:	/* arithmetic trap */
210 			ucode = code;
211 			i = SIGFPE;
212 			break;
213 
214 		case T_ASTFLT:		/* Allow process switch */
215 			astoff();
216 			cnt.v_soft++;
217 			if (p->p_flag & P_OWEUPC) {
218 				addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
219 				p->p_flag &= ~P_OWEUPC;
220 			}
221 			goto out;
222 
223 		case T_PROTFLT:		/* general protection fault */
224 		case T_SEGNPFLT:	/* segment not present fault */
225 		case T_STKFLT:		/* stack fault */
226 		case T_TSSFLT:		/* invalid TSS fault */
227 		case T_DOUBLEFLT:	/* double fault */
228 		default:
229 			ucode = code + BUS_SEGM_FAULT ;
230 			i = SIGBUS;
231 			break;
232 
233 		case T_PAGEFLT:		/* page fault */
234 			i = trap_pfault(&frame, TRUE);
235 			if (i == -1)
236 				return;
237 			if (i == 0)
238 				goto out;
239 
240 			ucode = T_PAGEFLT;
241 			break;
242 
243 		case T_DIVIDE:		/* integer divide fault */
244 			ucode = FPE_INTDIV_TRAP;
245 			i = SIGFPE;
246 			break;
247 
248 #if NISA > 0
249 		case T_NMI:
250 #ifdef POWERFAIL_NMI
251 			goto handle_powerfail;
252 #else /* !POWERFAIL_NMI */
253 #ifdef DDB
254 			/* NMI can be hooked up to a pushbutton for debugging */
255 			printf ("NMI ... going to debugger\n");
256 			if (kdb_trap (type, 0, &frame))
257 				return;
258 #endif /* DDB */
259 			/* machine/parity/power fail/"kitchen sink" faults */
260 			if (isa_nmi(code) == 0) return;
261 			panic("NMI indicates hardware failure");
262 #endif /* POWERFAIL_NMI */
263 #endif /* NISA > 0 */
264 
265 		case T_OFLOW:		/* integer overflow fault */
266 			ucode = FPE_INTOVF_TRAP;
267 			i = SIGFPE;
268 			break;
269 
270 		case T_BOUND:		/* bounds check fault */
271 			ucode = FPE_SUBRNG_TRAP;
272 			i = SIGFPE;
273 			break;
274 
275 		case T_DNA:
276 #if NNPX > 0
277 			/* if a transparent fault (due to context switch "late") */
278 			if (npxdna())
279 				return;
280 #endif	/* NNPX > 0 */
281 
282 #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE)
283 			i = math_emulate(&frame);
284 			if (i == 0) {
285 				if (!(frame.tf_eflags & PSL_T))
286 					return;
287 				frame.tf_eflags &= ~PSL_T;
288 				i = SIGTRAP;
289 			}
290 			/* else ucode = emulator_only_knows() XXX */
291 #else	/* MATH_EMULATE || GPL_MATH_EMULATE */
292 			i = SIGFPE;
293 			ucode = FPE_FPU_NP_TRAP;
294 #endif	/* MATH_EMULATE || GPL_MATH_EMULATE */
295 			break;
296 
297 		case T_FPOPFLT:		/* FPU operand fetch fault */
298 			ucode = T_FPOPFLT;
299 			i = SIGILL;
300 			break;
301 		}
302 	} else {
303 		/* kernel trap */
304 
305 		switch (type) {
306 		case T_PAGEFLT:			/* page fault */
307 			(void) trap_pfault(&frame, FALSE);
308 			return;
309 
310 		case T_PROTFLT:		/* general protection fault */
311 		case T_SEGNPFLT:	/* segment not present fault */
312 			/*
313 			 * Invalid segment selectors and out of bounds
314 			 * %eip's and %esp's can be set up in user mode.
315 			 * This causes a fault in kernel mode when the
316 			 * kernel tries to return to user mode.  We want
317 			 * to get this fault so that we can fix the
318 			 * problem here and not have to check all the
319 			 * selectors and pointers when the user changes
320 			 * them.
321 			 */
322 #define	MAYBE_DORETI_FAULT(where, whereto)				\
323 	do {								\
324 		if (frame.tf_eip == (int)where) {			\
325 			frame.tf_eip = (int)whereto;			\
326 			return;						\
327 		}							\
328 	} while (0)
329 
330 			if (intr_nesting_level == 0) {
331 				MAYBE_DORETI_FAULT(doreti_iret,
332 						   doreti_iret_fault);
333 				MAYBE_DORETI_FAULT(doreti_popl_ds,
334 						   doreti_popl_ds_fault);
335 				MAYBE_DORETI_FAULT(doreti_popl_es,
336 						   doreti_popl_es_fault);
337 			}
338 			if (curpcb && curpcb->pcb_onfault) {
339 				frame.tf_eip = (int)curpcb->pcb_onfault;
340 				return;
341 			}
342 			break;
343 
344 		case T_TSSFLT:
345 			/*
346 			 * PSL_NT can be set in user mode and isn't cleared
347 			 * automatically when the kernel is entered.  This
348 			 * causes a TSS fault when the kernel attempts to
349 			 * `iret' because the TSS link is uninitialized.  We
350 			 * want to get this fault so that we can fix the
351 			 * problem here and not every time the kernel is
352 			 * entered.
353 			 */
354 			if (frame.tf_eflags & PSL_NT) {
355 				frame.tf_eflags &= ~PSL_NT;
356 				return;
357 			}
358 			break;
359 
360 		case T_TRCTRAP:	 /* trace trap */
361 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
362 				/*
363 				 * We've just entered system mode via the
364 				 * syscall lcall.  Continue single stepping
365 				 * silently until the syscall handler has
366 				 * saved the flags.
367 				 */
368 				return;
369 			}
370 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
371 				/*
372 				 * The syscall handler has now saved the
373 				 * flags.  Stop single stepping it.
374 				 */
375 				frame.tf_eflags &= ~PSL_T;
376 				return;
377 			}
378 			/*
379 			 * Fall through.
380 			 */
381 		case T_BPTFLT:
382 			/*
383 			 * If DDB is enabled, let it handle the debugger trap.
384 			 * Otherwise, debugger traps "can't happen".
385 			 */
386 #ifdef DDB
387 			if (kdb_trap (type, 0, &frame))
388 				return;
389 #endif
390 			break;
391 
392 #if NISA > 0
393 		case T_NMI:
394 #ifdef POWERFAIL_NMI
395 #ifndef TIMER_FREQ
396 #  define TIMER_FREQ 1193182
397 #endif
398 	handle_powerfail:
399 		{
400 		  static unsigned lastalert = 0;
401 
402 		  if(time.tv_sec - lastalert > 10)
403 		    {
404 		      log(LOG_WARNING, "NMI: power fail\n");
405 		      sysbeep(TIMER_FREQ/880, hz);
406 		      lastalert = time.tv_sec;
407 		    }
408 		  return;
409 		}
410 #else /* !POWERFAIL_NMI */
411 #ifdef DDB
412 			/* NMI can be hooked up to a pushbutton for debugging */
413 			printf ("NMI ... going to debugger\n");
414 			if (kdb_trap (type, 0, &frame))
415 				return;
416 #endif /* DDB */
417 			/* machine/parity/power fail/"kitchen sink" faults */
418 			if (isa_nmi(code) == 0) return;
419 			/* FALL THROUGH */
420 #endif /* POWERFAIL_NMI */
421 #endif /* NISA > 0 */
422 		}
423 
424 		trap_fatal(&frame);
425 		return;
426 	}
427 
428 	trapsignal(p, i, ucode);
429 
430 #ifdef DEBUG
431 	eva = rcr2();
432 	if (type <= MAX_TRAP_MSG) {
433 		uprintf("fatal process exception: %s",
434 			trap_msg[type]);
435 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
436 			uprintf(", fault VA = 0x%x", eva);
437 		uprintf("\n");
438 	}
439 #endif
440 
441 out:
442 	userret(p, &frame, sticks);
443 }
444 
445 #ifdef notyet
446 /*
447  * This version doesn't allow a page fault to user space while
448  * in the kernel. The rest of the kernel needs to be made "safe"
449  * before this can be used. I think the only things remaining
450  * to be made safe are the iBCS2 code and the process tracing/
451  * debugging code.
452  */
453 int
454 trap_pfault(frame, usermode)
455 	struct trapframe *frame;
456 	int usermode;
457 {
458 	vm_offset_t va;
459 	struct vmspace *vm = NULL;
460 	vm_map_t map = 0;
461 	int rv = 0;
462 	vm_prot_t ftype;
463 	int eva;
464 	struct proc *p = curproc;
465 
466 	if (frame->tf_err & PGEX_W)
467 		ftype = VM_PROT_READ | VM_PROT_WRITE;
468 	else
469 		ftype = VM_PROT_READ;
470 
471 	eva = rcr2();
472 	va = trunc_page((vm_offset_t)eva);
473 
474 	if (va < VM_MIN_KERNEL_ADDRESS) {
475 		vm_offset_t v;
476 		vm_page_t ptepg;
477 
478 		if (p == NULL ||
479 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
480 		    (curpcb == NULL || curpcb->pcb_onfault == NULL))) {
481 			trap_fatal(frame);
482 			return (-1);
483 		}
484 
485 		/*
486 		 * This is a fault on non-kernel virtual memory.
487 		 * vm is initialized above to NULL. If curproc is NULL
488 		 * or curproc->p_vmspace is NULL the fault is fatal.
489 		 */
490 		vm = p->p_vmspace;
491 		if (vm == NULL)
492 			goto nogo;
493 
494 		map = &vm->vm_map;
495 
496 		/*
497 		 * Keep swapout from messing with us during this
498 		 *	critical time.
499 		 */
500 		++p->p_lock;
501 
502 		/*
503 		 * Grow the stack if necessary
504 		 */
505 		if ((caddr_t)va > vm->vm_maxsaddr
506 		    && (caddr_t)va < (caddr_t)USRSTACK) {
507 			if (!grow(p, va)) {
508 				rv = KERN_FAILURE;
509 				--p->p_lock;
510 				goto nogo;
511 			}
512 		}
513 
514 		/*
515 		 * Check if page table is mapped, if not,
516 		 *	fault it first
517 		 */
518 		v = (vm_offset_t) vtopte(va);
519 
520 		/* Fault the pte only if needed: */
521 		if (*((int *)vtopte(v)) == 0)
522 			(void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE);
523 
524 		pmap_use_pt( vm_map_pmap(map), va);
525 
526 		/* Fault in the user page: */
527 		rv = vm_fault(map, va, ftype, FALSE);
528 
529 		pmap_unuse_pt( vm_map_pmap(map), va);
530 
531 		--p->p_lock;
532 	} else {
533 		/*
534 		 * Don't allow user-mode faults in kernel address space.
535 		 */
536 		if (usermode)
537 			goto nogo;
538 
539 		/*
540 		 * Since we know that kernel virtual address addresses
541 		 * always have pte pages mapped, we just have to fault
542 		 * the page.
543 		 */
544 		rv = vm_fault(kernel_map, va, ftype, FALSE);
545 	}
546 
547 	if (rv == KERN_SUCCESS)
548 		return (0);
549 nogo:
550 	if (!usermode) {
551 		if (curpcb && curpcb->pcb_onfault) {
552 			frame->tf_eip = (int)curpcb->pcb_onfault;
553 			return (0);
554 		}
555 		trap_fatal(frame);
556 		return (-1);
557 	}
558 
559 	/* kludge to pass faulting virtual address to sendsig */
560 	frame->tf_err = eva;
561 
562 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
563 }
564 #endif
565 
566 int
567 trap_pfault(frame, usermode)
568 	struct trapframe *frame;
569 	int usermode;
570 {
571 	vm_offset_t va;
572 	struct vmspace *vm = NULL;
573 	vm_map_t map = 0;
574 	int rv = 0;
575 	vm_prot_t ftype;
576 	int eva;
577 	struct proc *p = curproc;
578 
579 	eva = rcr2();
580 	va = trunc_page((vm_offset_t)eva);
581 
582 	if (va >= KERNBASE) {
583 		/*
584 		 * Don't allow user-mode faults in kernel address space.
585 		 */
586 		if (usermode)
587 			goto nogo;
588 
589 		map = kernel_map;
590 	} else {
591 		/*
592 		 * This is a fault on non-kernel virtual memory.
593 		 * vm is initialized above to NULL. If curproc is NULL
594 		 * or curproc->p_vmspace is NULL the fault is fatal.
595 		 */
596 		if (p != NULL)
597 			vm = p->p_vmspace;
598 
599 		if (vm == NULL)
600 			goto nogo;
601 
602 		map = &vm->vm_map;
603 	}
604 
605 	if (frame->tf_err & PGEX_W)
606 		ftype = VM_PROT_READ | VM_PROT_WRITE;
607 	else
608 		ftype = VM_PROT_READ;
609 
610 	if (map != kernel_map) {
611 		vm_offset_t v;
612 
613 		/*
614 		 * Keep swapout from messing with us during this
615 		 *	critical time.
616 		 */
617 		++p->p_lock;
618 
619 		/*
620 		 * Grow the stack if necessary
621 		 */
622 		if ((caddr_t)va > vm->vm_maxsaddr
623 		    && (caddr_t)va < (caddr_t)USRSTACK) {
624 			if (!grow(p, va)) {
625 				rv = KERN_FAILURE;
626 				--p->p_lock;
627 				goto nogo;
628 			}
629 		}
630 
631 		/*
632 		 * Check if page table is mapped, if not,
633 		 *	fault it first
634 		 */
635 		v = (vm_offset_t) vtopte(va);
636 
637 		/* Fault the pte only if needed: */
638 		if (*((int *)vtopte(v)) == 0)
639 			(void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE);
640 
641 		pmap_use_pt( vm_map_pmap(map), va);
642 
643 		/* Fault in the user page: */
644 		rv = vm_fault(map, va, ftype, FALSE);
645 
646 		pmap_unuse_pt( vm_map_pmap(map), va);
647 
648 		--p->p_lock;
649 	} else {
650 		/*
651 		 * Since we know that kernel virtual address addresses
652 		 * always have pte pages mapped, we just have to fault
653 		 * the page.
654 		 */
655 		rv = vm_fault(map, va, ftype, FALSE);
656 	}
657 
658 	if (rv == KERN_SUCCESS)
659 		return (0);
660 nogo:
661 	if (!usermode) {
662 		if (curpcb && curpcb->pcb_onfault) {
663 			frame->tf_eip = (int)curpcb->pcb_onfault;
664 			return (0);
665 		}
666 		trap_fatal(frame);
667 		return (-1);
668 	}
669 
670 	/* kludge to pass faulting virtual address to sendsig */
671 	frame->tf_err = eva;
672 
673 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
674 }
675 
676 void
677 trap_fatal(frame)
678 	struct trapframe *frame;
679 {
680 	int code, type, eva;
681 	struct soft_segment_descriptor softseg;
682 
683 	code = frame->tf_err;
684 	type = frame->tf_trapno;
685 	eva = rcr2();
686 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
687 
688 	if (type <= MAX_TRAP_MSG)
689 		printf("\n\nFatal trap %d: %s while in %s mode\n",
690 			type, trap_msg[type],
691 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
692 	if (type == T_PAGEFLT) {
693 		printf("fault virtual address	= 0x%x\n", eva);
694 		printf("fault code		= %s %s, %s\n",
695 			code & PGEX_U ? "user" : "supervisor",
696 			code & PGEX_W ? "write" : "read",
697 			code & PGEX_P ? "protection violation" : "page not present");
698 	}
699 	printf("instruction pointer	= 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip);
700 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
701 	    softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
702 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
703 	    softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran);
704 	printf("processor eflags	= ");
705 	if (frame->tf_eflags & PSL_T)
706 		printf("trace/trap, ");
707 	if (frame->tf_eflags & PSL_I)
708 		printf("interrupt enabled, ");
709 	if (frame->tf_eflags & PSL_NT)
710 		printf("nested task, ");
711 	if (frame->tf_eflags & PSL_RF)
712 		printf("resume, ");
713 	if (frame->tf_eflags & PSL_VM)
714 		printf("vm86, ");
715 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
716 	printf("current process		= ");
717 	if (curproc) {
718 		printf("%lu (%s)\n",
719 		    (u_long)curproc->p_pid, curproc->p_comm ?
720 		    curproc->p_comm : "");
721 	} else {
722 		printf("Idle\n");
723 	}
724 	printf("interrupt mask		= ");
725 	if ((cpl & net_imask) == net_imask)
726 		printf("net ");
727 	if ((cpl & tty_imask) == tty_imask)
728 		printf("tty ");
729 	if ((cpl & bio_imask) == bio_imask)
730 		printf("bio ");
731 	if (cpl == 0)
732 		printf("none");
733 	printf("\n");
734 
735 #ifdef KDB
736 	if (kdb_trap(&psl))
737 		return;
738 #endif
739 #ifdef DDB
740 	if (kdb_trap (type, 0, frame))
741 		return;
742 #endif
743 	if (type <= MAX_TRAP_MSG)
744 		panic(trap_msg[type]);
745 	else
746 		panic("unknown/reserved trap");
747 }
748 
749 /*
750  * Compensate for 386 brain damage (missing URKR).
751  * This is a little simpler than the pagefault handler in trap() because
752  * it the page tables have already been faulted in and high addresses
753  * are thrown out early for other reasons.
754  */
755 int trapwrite(addr)
756 	unsigned addr;
757 {
758 	struct proc *p;
759 	vm_offset_t va, v;
760 	struct vmspace *vm;
761 	int rv;
762 
763 	va = trunc_page((vm_offset_t)addr);
764 	/*
765 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
766 	 */
767 	if (va >= VM_MAXUSER_ADDRESS)
768 		return (1);
769 
770 	p = curproc;
771 	vm = p->p_vmspace;
772 
773 	++p->p_lock;
774 
775 	if ((caddr_t)va >= vm->vm_maxsaddr
776 	    && (caddr_t)va < (caddr_t)USRSTACK) {
777 		if (!grow(p, va)) {
778 			--p->p_lock;
779 			return (1);
780 		}
781 	}
782 
783 	v = trunc_page(vtopte(va));
784 
785 	/*
786 	 * wire the pte page
787 	 */
788 	if (va < USRSTACK) {
789 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
790 	}
791 
792 	/*
793 	 * fault the data page
794 	 */
795 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
796 
797 	/*
798 	 * unwire the pte page
799 	 */
800 	if (va < USRSTACK) {
801 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
802 	}
803 
804 	--p->p_lock;
805 
806 	if (rv != KERN_SUCCESS)
807 		return 1;
808 
809 	return (0);
810 }
811 
812 /*
813  * System call request from POSIX system call gate interface to kernel.
814  * Like trap(), argument is call by reference.
815  */
816 void
817 syscall(frame)
818 	struct trapframe frame;
819 {
820 	caddr_t params;
821 	int i;
822 	struct sysent *callp;
823 	struct proc *p = curproc;
824 	u_quad_t sticks;
825 	int error;
826 	int args[8], rval[2];
827 	u_int code;
828 
829 	sticks = p->p_sticks;
830 	if (ISPL(frame.tf_cs) != SEL_UPL)
831 		panic("syscall");
832 
833 	p->p_md.md_regs = (int *)&frame;
834 	params = (caddr_t)frame.tf_esp + sizeof(int);
835 	code = frame.tf_eax;
836 	/*
837 	 * Need to check if this is a 32 bit or 64 bit syscall.
838 	 */
839 	if (code == SYS_syscall) {
840 		/*
841 		 * Code is first argument, followed by actual args.
842 		 */
843 		code = fuword(params);
844 		params += sizeof(int);
845 	} else if (code == SYS___syscall) {
846 		/*
847 		 * Like syscall, but code is a quad, so as to maintain
848 		 * quad alignment for the rest of the arguments.
849 		 */
850 		code = fuword(params);
851 		params += sizeof(quad_t);
852 	}
853 
854  	if (p->p_sysent->sv_mask)
855  		code &= p->p_sysent->sv_mask;
856 
857  	if (code >= p->p_sysent->sv_size)
858  		callp = &p->p_sysent->sv_table[0];
859   	else
860  		callp = &p->p_sysent->sv_table[code];
861 
862 	if ((i = callp->sy_narg * sizeof(int)) &&
863 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
864 #ifdef KTRACE
865 		if (KTRPOINT(p, KTR_SYSCALL))
866 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
867 #endif
868 		goto bad;
869 	}
870 #ifdef KTRACE
871 	if (KTRPOINT(p, KTR_SYSCALL))
872 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
873 #endif
874 	rval[0] = 0;
875 	rval[1] = frame.tf_edx;
876 
877 	error = (*callp->sy_call)(p, args, rval);
878 
879 	switch (error) {
880 
881 	case 0:
882 		/*
883 		 * Reinitialize proc pointer `p' as it may be different
884 		 * if this is a child returning from fork syscall.
885 		 */
886 		p = curproc;
887 		frame.tf_eax = rval[0];
888 		frame.tf_edx = rval[1];
889 		frame.tf_eflags &= ~PSL_C;
890 		break;
891 
892 	case ERESTART:
893 		/*
894 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes.
895 		 */
896 		frame.tf_eip -= 7;
897 		break;
898 
899 	case EJUSTRETURN:
900 		break;
901 
902 	default:
903 bad:
904  		if (p->p_sysent->sv_errsize)
905  			if (error >= p->p_sysent->sv_errsize)
906   				error = -1;	/* XXX */
907    			else
908   				error = p->p_sysent->sv_errtbl[error];
909 		frame.tf_eax = error;
910 		frame.tf_eflags |= PSL_C;
911 		break;
912 	}
913 
914 	if (frame.tf_eflags & PSL_T) {
915 		/* Traced syscall. */
916 		frame.tf_eflags &= ~PSL_T;
917 		trapsignal(p, SIGTRAP, 0);
918 	}
919 
920 	userret(p, &frame, sticks);
921 
922 #ifdef KTRACE
923 	if (KTRPOINT(p, KTR_SYSRET))
924 		ktrsysret(p->p_tracep, code, error, rval[0]);
925 #endif
926 }
927 
928 #ifdef COMPAT_LINUX
929 void
930 linux_syscall(frame)
931 	struct trapframe frame;
932 {
933 	struct proc *p = curproc;
934 	struct sysent *callp;
935 	u_quad_t sticks;
936 	int error;
937 	int rval[2];
938 	u_int code;
939 	struct linux_syscall_args {
940 		int arg1;
941 		int arg2;
942 		int arg3;
943 		int arg4;
944 		int arg5;
945 	} args;
946 
947 	args.arg1 = frame.tf_ebx;
948 	args.arg2 = frame.tf_ecx;
949 	args.arg3 = frame.tf_edx;
950 	args.arg4 = frame.tf_esi;
951 	args.arg5 = frame.tf_edi;
952 
953 	sticks = p->p_sticks;
954 	if (ISPL(frame.tf_cs) != SEL_UPL)
955 		panic("linux syscall");
956 
957 	p->p_md.md_regs = (int *)&frame;
958 	code = frame.tf_eax;
959 
960 	if (p->p_sysent->sv_mask)
961 		code &= p->p_sysent->sv_mask;
962 
963 	if (code >= p->p_sysent->sv_size)
964 		callp = &p->p_sysent->sv_table[0];
965 	else
966 		callp = &p->p_sysent->sv_table[code];
967 
968 #ifdef KTRACE
969 	if (KTRPOINT(p, KTR_SYSCALL))
970 		ktrsyscall(p->p_tracep, code, callp->sy_narg, (int *)&args);
971 #endif
972 
973 	rval[0] = 0;
974 
975 	error = (*callp->sy_call)(p, &args, rval);
976 
977 	switch (error) {
978 
979 	case 0:
980 		/*
981 		 * Reinitialize proc pointer `p' as it may be different
982 		 * if this is a child returning from fork syscall.
983 		 */
984 		p = curproc;
985 		frame.tf_eax = rval[0];
986 		frame.tf_eflags &= ~PSL_C;
987 		break;
988 
989 	case ERESTART:
990 		/* Reconstruct pc, subtract size of int 0x80 */
991 		frame.tf_eip -= 2;
992 		break;
993 
994 	case EJUSTRETURN:
995 		break;
996 
997 	default:
998  		if (p->p_sysent->sv_errsize)
999  			if (error >= p->p_sysent->sv_errsize)
1000   				error = -1;	/* XXX */
1001    			else
1002   				error = p->p_sysent->sv_errtbl[error];
1003 		frame.tf_eax = -error;
1004 		frame.tf_eflags |= PSL_C;
1005 		break;
1006 	}
1007 
1008 	if (frame.tf_eflags & PSL_T) {
1009 		/* Traced syscall. */
1010 		frame.tf_eflags &= ~PSL_T;
1011 		trapsignal(p, SIGTRAP, 0);
1012 	}
1013 
1014 	userret(p, &frame, sticks);
1015 
1016 #ifdef KTRACE
1017 	if (KTRPOINT(p, KTR_SYSRET))
1018 		ktrsysret(p->p_tracep, code, error, rval[0]);
1019 #endif
1020 }
1021 #endif /* COMPAT_LINUX */
1022