xref: /freebsd/sys/kern/subr_trap.c (revision 8e6b01171e30297084bb0b4457c4183c2746aacc)
1 /*-
2  * Copyright (C) 1994, David Greenman
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the University of Utah, and William Jolitz.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
38  *	$Id: trap.c,v 1.60 1995/10/04 07:07:44 julian Exp $
39  */
40 
41 /*
42  * 386 Trap and System call handling
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/user.h>
49 #include <sys/acct.h>
50 #include <sys/kernel.h>
51 #include <sys/syscall.h>
52 #include <sys/sysent.h>
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56 
57 #include <vm/vm_param.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_page.h>
62 
63 #include <machine/cpu.h>
64 #include <machine/md_var.h>
65 #include <machine/psl.h>
66 #include <machine/reg.h>
67 #include <machine/trap.h>
68 #include <machine/../isa/isa_device.h>
69 
70 #ifdef POWERFAIL_NMI
71 # include <syslog.h>
72 # include <machine/clock.h>
73 #endif
74 
75 #include "isa.h"
76 #include "npx.h"
77 
78 extern void trap __P((struct trapframe frame));
79 extern int trapwrite __P((unsigned addr));
80 extern void syscall __P((struct trapframe frame));
81 extern void linux_syscall __P((struct trapframe frame));
82 
83 int	trap_pfault	__P((struct trapframe *, int));
84 void	trap_fatal	__P((struct trapframe *));
85 
86 extern inthand_t IDTVEC(syscall);
87 
88 #define MAX_TRAP_MSG		27
89 char *trap_msg[] = {
90 	"",					/*  0 unused */
91 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
92 	"",					/*  2 unused */
93 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
94 	"",					/*  4 unused */
95 	"",					/*  5 unused */
96 	"arithmetic trap",			/*  6 T_ARITHTRAP */
97 	"system forced exception",		/*  7 T_ASTFLT */
98 	"",					/*  8 unused */
99 	"general protection fault",		/*  9 T_PROTFLT */
100 	"trace trap",				/* 10 T_TRCTRAP */
101 	"",					/* 11 unused */
102 	"page fault",				/* 12 T_PAGEFLT */
103 	"",					/* 13 unused */
104 	"alignment fault",			/* 14 T_ALIGNFLT */
105 	"",					/* 15 unused */
106 	"",					/* 16 unused */
107 	"",					/* 17 unused */
108 	"integer divide fault",			/* 18 T_DIVIDE */
109 	"non-maskable interrupt trap",		/* 19 T_NMI */
110 	"overflow trap",			/* 20 T_OFLOW */
111 	"FPU bounds check fault",		/* 21 T_BOUND */
112 	"FPU device not available",		/* 22 T_DNA */
113 	"double fault",				/* 23 T_DOUBLEFLT */
114 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
115 	"invalid TSS fault",			/* 25 T_TSSFLT */
116 	"segment not present fault",		/* 26 T_SEGNPFLT */
117 	"stack fault",				/* 27 T_STKFLT */
118 };
119 
120 static void userret __P((struct proc *p, struct trapframe *frame,
121 			 u_quad_t oticks));
122 
123 static inline void
124 userret(p, frame, oticks)
125 	struct proc *p;
126 	struct trapframe *frame;
127 	u_quad_t oticks;
128 {
129 	int sig, s;
130 
131 	while ((sig = CURSIG(p)) != 0)
132 		postsig(sig);
133 	p->p_priority = p->p_usrpri;
134 	if (want_resched) {
135 		/*
136 		 * Since we are curproc, clock will normally just change
137 		 * our priority without moving us from one queue to another
138 		 * (since the running process is not on a queue.)
139 		 * If that happened after we setrunqueue ourselves but before we
140 		 * mi_switch()'ed, we might not be on the queue indicated by
141 		 * our priority.
142 		 */
143 		s = splclock();
144 		setrunqueue(p);
145 		p->p_stats->p_ru.ru_nivcsw++;
146 		mi_switch();
147 		splx(s);
148 		while ((sig = CURSIG(p)) != 0)
149 			postsig(sig);
150 	}
151 	/*
152 	 * Charge system time if profiling.
153 	 */
154 	if (p->p_flag & P_PROFIL) {
155 		u_quad_t ticks = p->p_sticks - oticks;
156 
157 		if (ticks) {
158 #ifdef PROFTIMER
159 			extern int profscale;
160 			addupc(frame->tf_eip, &p->p_stats->p_prof,
161 			    ticks * profscale);
162 #else
163 			addupc(frame->tf_eip, &p->p_stats->p_prof, ticks);
164 #endif
165 		}
166 	}
167 	curpriority = p->p_priority;
168 }
169 
170 /*
171  * Exception, fault, and trap interface to the FreeBSD kernel.
172  * This common code is called from assembly language IDT gate entry
173  * routines that prepare a suitable stack frame, and restore this
174  * frame after the exception has been processed.
175  */
176 
177 void
178 trap(frame)
179 	struct trapframe frame;
180 {
181 	struct proc *p = curproc;
182 	u_quad_t sticks = 0;
183 	int i = 0, ucode = 0, type, code;
184 #ifdef DIAGNOSTIC
185 	u_long eva;
186 #endif
187 
188 	type = frame.tf_trapno;
189 	code = frame.tf_err;
190 
191 	if (ISPL(frame.tf_cs) == SEL_UPL) {
192 		/* user trap */
193 
194 		sticks = p->p_sticks;
195 		p->p_md.md_regs = (int *)&frame;
196 
197 		switch (type) {
198 		case T_PRIVINFLT:	/* privileged instruction fault */
199 			ucode = type;
200 			i = SIGILL;
201 			break;
202 
203 		case T_BPTFLT:		/* bpt instruction fault */
204 		case T_TRCTRAP:		/* trace trap */
205 			frame.tf_eflags &= ~PSL_T;
206 			i = SIGTRAP;
207 			break;
208 
209 		case T_ARITHTRAP:	/* arithmetic trap */
210 			ucode = code;
211 			i = SIGFPE;
212 			break;
213 
214 		case T_ASTFLT:		/* Allow process switch */
215 			astoff();
216 			cnt.v_soft++;
217 			if (p->p_flag & P_OWEUPC) {
218 				addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
219 				p->p_flag &= ~P_OWEUPC;
220 			}
221 			goto out;
222 
223 		case T_PROTFLT:		/* general protection fault */
224 		case T_SEGNPFLT:	/* segment not present fault */
225 		case T_STKFLT:		/* stack fault */
226 		case T_TSSFLT:		/* invalid TSS fault */
227 		case T_DOUBLEFLT:	/* double fault */
228 		default:
229 			ucode = code + BUS_SEGM_FAULT ;
230 			i = SIGBUS;
231 			break;
232 
233 		case T_PAGEFLT:		/* page fault */
234 			i = trap_pfault(&frame, TRUE);
235 			if (i == -1)
236 				return;
237 			if (i == 0)
238 				goto out;
239 
240 			ucode = T_PAGEFLT;
241 			break;
242 
243 		case T_DIVIDE:		/* integer divide fault */
244 			ucode = FPE_INTDIV_TRAP;
245 			i = SIGFPE;
246 			break;
247 
248 #if NISA > 0
249 		case T_NMI:
250 #ifdef POWERFAIL_NMI
251 			goto handle_powerfail;
252 #else /* !POWERFAIL_NMI */
253 #ifdef DDB
254 			/* NMI can be hooked up to a pushbutton for debugging */
255 			printf ("NMI ... going to debugger\n");
256 			if (kdb_trap (type, 0, &frame))
257 				return;
258 #endif /* DDB */
259 			/* machine/parity/power fail/"kitchen sink" faults */
260 			if (isa_nmi(code) == 0) return;
261 			panic("NMI indicates hardware failure");
262 #endif /* POWERFAIL_NMI */
263 #endif /* NISA > 0 */
264 
265 		case T_OFLOW:		/* integer overflow fault */
266 			ucode = FPE_INTOVF_TRAP;
267 			i = SIGFPE;
268 			break;
269 
270 		case T_BOUND:		/* bounds check fault */
271 			ucode = FPE_SUBRNG_TRAP;
272 			i = SIGFPE;
273 			break;
274 
275 		case T_DNA:
276 #if NNPX > 0
277 			/* if a transparent fault (due to context switch "late") */
278 			if (npxdna())
279 				return;
280 #endif	/* NNPX > 0 */
281 
282 #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE)
283 			i = math_emulate(&frame);
284 			if (i == 0) {
285 				if (!(frame.tf_eflags & PSL_T))
286 					return;
287 				frame.tf_eflags &= ~PSL_T;
288 				i = SIGTRAP;
289 			}
290 			/* else ucode = emulator_only_knows() XXX */
291 #else	/* MATH_EMULATE || GPL_MATH_EMULATE */
292 			i = SIGFPE;
293 			ucode = FPE_FPU_NP_TRAP;
294 #endif	/* MATH_EMULATE || GPL_MATH_EMULATE */
295 			break;
296 
297 		case T_FPOPFLT:		/* FPU operand fetch fault */
298 			ucode = T_FPOPFLT;
299 			i = SIGILL;
300 			break;
301 		}
302 	} else {
303 		/* kernel trap */
304 
305 		switch (type) {
306 		case T_PAGEFLT:			/* page fault */
307 			(void) trap_pfault(&frame, FALSE);
308 			return;
309 
310 		case T_PROTFLT:		/* general protection fault */
311 		case T_SEGNPFLT:	/* segment not present fault */
312 			/*
313 			 * Invalid segment selectors and out of bounds
314 			 * %eip's and %esp's can be set up in user mode.
315 			 * This causes a fault in kernel mode when the
316 			 * kernel tries to return to user mode.  We want
317 			 * to get this fault so that we can fix the
318 			 * problem here and not have to check all the
319 			 * selectors and pointers when the user changes
320 			 * them.
321 			 */
322 #define	MAYBE_DORETI_FAULT(where, whereto)				\
323 	do {								\
324 		if (frame.tf_eip == (int)where) {			\
325 			frame.tf_eip = (int)whereto;			\
326 			return;						\
327 		}							\
328 	} while (0)
329 
330 			if (intr_nesting_level == 0) {
331 				MAYBE_DORETI_FAULT(doreti_iret,
332 						   doreti_iret_fault);
333 				MAYBE_DORETI_FAULT(doreti_popl_ds,
334 						   doreti_popl_ds_fault);
335 				MAYBE_DORETI_FAULT(doreti_popl_es,
336 						   doreti_popl_es_fault);
337 			}
338 			if (curpcb && curpcb->pcb_onfault) {
339 				frame.tf_eip = (int)curpcb->pcb_onfault;
340 				return;
341 			}
342 			break;
343 
344 		case T_TSSFLT:
345 			/*
346 			 * PSL_NT can be set in user mode and isn't cleared
347 			 * automatically when the kernel is entered.  This
348 			 * causes a TSS fault when the kernel attempts to
349 			 * `iret' because the TSS link is uninitialized.  We
350 			 * want to get this fault so that we can fix the
351 			 * problem here and not every time the kernel is
352 			 * entered.
353 			 */
354 			if (frame.tf_eflags & PSL_NT) {
355 				frame.tf_eflags &= ~PSL_NT;
356 				return;
357 			}
358 			break;
359 
360 		case T_TRCTRAP:	 /* trace trap */
361 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
362 				/*
363 				 * We've just entered system mode via the
364 				 * syscall lcall.  Continue single stepping
365 				 * silently until the syscall handler has
366 				 * saved the flags.
367 				 */
368 				return;
369 			}
370 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
371 				/*
372 				 * The syscall handler has now saved the
373 				 * flags.  Stop single stepping it.
374 				 */
375 				frame.tf_eflags &= ~PSL_T;
376 				return;
377 			}
378 			/*
379 			 * Fall through.
380 			 */
381 		case T_BPTFLT:
382 			/*
383 			 * If DDB is enabled, let it handle the debugger trap.
384 			 * Otherwise, debugger traps "can't happen".
385 			 */
386 #ifdef DDB
387 			if (kdb_trap (type, 0, &frame))
388 				return;
389 #endif
390 			break;
391 
392 #if NISA > 0
393 		case T_NMI:
394 #ifdef POWERFAIL_NMI
395 #ifndef TIMER_FREQ
396 #  define TIMER_FREQ 1193182
397 #endif
398 	handle_powerfail:
399 		{
400 		  static unsigned lastalert = 0;
401 
402 		  if(time.tv_sec - lastalert > 10)
403 		    {
404 		      log(LOG_WARNING, "NMI: power fail\n");
405 		      sysbeep(TIMER_FREQ/880, hz);
406 		      lastalert = time.tv_sec;
407 		    }
408 		  return;
409 		}
410 #else /* !POWERFAIL_NMI */
411 #ifdef DDB
412 			/* NMI can be hooked up to a pushbutton for debugging */
413 			printf ("NMI ... going to debugger\n");
414 			if (kdb_trap (type, 0, &frame))
415 				return;
416 #endif /* DDB */
417 			/* machine/parity/power fail/"kitchen sink" faults */
418 			if (isa_nmi(code) == 0) return;
419 			/* FALL THROUGH */
420 #endif /* POWERFAIL_NMI */
421 #endif /* NISA > 0 */
422 		}
423 
424 		trap_fatal(&frame);
425 		return;
426 	}
427 
428 	trapsignal(p, i, ucode);
429 
430 #ifdef DEBUG
431 	eva = rcr2();
432 	if (type <= MAX_TRAP_MSG) {
433 		uprintf("fatal process exception: %s",
434 			trap_msg[type]);
435 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
436 			uprintf(", fault VA = 0x%x", eva);
437 		uprintf("\n");
438 	}
439 #endif
440 
441 out:
442 	userret(p, &frame, sticks);
443 }
444 
445 #ifdef notyet
446 /*
447  * This version doesn't allow a page fault to user space while
448  * in the kernel. The rest of the kernel needs to be made "safe"
449  * before this can be used. I think the only things remaining
450  * to be made safe are the iBCS2 code and the process tracing/
451  * debugging code.
452  */
453 int
454 trap_pfault(frame, usermode)
455 	struct trapframe *frame;
456 	int usermode;
457 {
458 	vm_offset_t va;
459 	struct vmspace *vm = NULL;
460 	vm_map_t map = 0;
461 	int rv = 0;
462 	vm_prot_t ftype;
463 	int eva;
464 	struct proc *p = curproc;
465 
466 	if (frame->tf_err & PGEX_W)
467 		ftype = VM_PROT_READ | VM_PROT_WRITE;
468 	else
469 		ftype = VM_PROT_READ;
470 
471 	eva = rcr2();
472 	va = trunc_page((vm_offset_t)eva);
473 
474 	if (va < VM_MIN_KERNEL_ADDRESS) {
475 		vm_offset_t v;
476 		vm_page_t ptepg;
477 
478 		if (p == NULL ||
479 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
480 		    (curpcb == NULL || curpcb->pcb_onfault == NULL))) {
481 			trap_fatal(frame);
482 			return (-1);
483 		}
484 
485 		/*
486 		 * This is a fault on non-kernel virtual memory.
487 		 * vm is initialized above to NULL. If curproc is NULL
488 		 * or curproc->p_vmspace is NULL the fault is fatal.
489 		 */
490 		vm = p->p_vmspace;
491 		if (vm == NULL)
492 			goto nogo;
493 
494 		map = &vm->vm_map;
495 
496 		/*
497 		 * Keep swapout from messing with us during this
498 		 *	critical time.
499 		 */
500 		++p->p_lock;
501 
502 		/*
503 		 * Grow the stack if necessary
504 		 */
505 		if ((caddr_t)va > vm->vm_maxsaddr
506 		    && (caddr_t)va < (caddr_t)USRSTACK) {
507 			if (!grow(p, va)) {
508 				rv = KERN_FAILURE;
509 				--p->p_lock;
510 				goto nogo;
511 			}
512 		}
513 
514 		/*
515 		 * Check if page table is mapped, if not,
516 		 *	fault it first
517 		 */
518 		v = (vm_offset_t) vtopte(va);
519 
520 		/* Fault the pte only if needed: */
521 		if (*((int *)vtopte(v)) == 0)
522 			(void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE);
523 
524 		pmap_use_pt( vm_map_pmap(map), va);
525 
526 		/* Fault in the user page: */
527 		rv = vm_fault(map, va, ftype, FALSE);
528 
529 		pmap_unuse_pt( vm_map_pmap(map), va);
530 
531 		--p->p_lock;
532 	} else {
533 		/*
534 		 * Don't allow user-mode faults in kernel address space.
535 		 */
536 		if (usermode)
537 			goto nogo;
538 
539 		/*
540 		 * Since we know that kernel virtual address addresses
541 		 * always have pte pages mapped, we just have to fault
542 		 * the page.
543 		 */
544 		rv = vm_fault(kernel_map, va, ftype, FALSE);
545 	}
546 
547 	if (rv == KERN_SUCCESS)
548 		return (0);
549 nogo:
550 	if (!usermode) {
551 		if (curpcb && curpcb->pcb_onfault) {
552 			frame->tf_eip = (int)curpcb->pcb_onfault;
553 			return (0);
554 		}
555 		trap_fatal(frame);
556 		return (-1);
557 	}
558 
559 	/* kludge to pass faulting virtual address to sendsig */
560 	frame->tf_err = eva;
561 
562 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
563 }
564 #endif
565 
566 int
567 trap_pfault(frame, usermode)
568 	struct trapframe *frame;
569 	int usermode;
570 {
571 	vm_offset_t va;
572 	struct vmspace *vm = NULL;
573 	vm_map_t map = 0;
574 	int rv = 0;
575 	vm_prot_t ftype;
576 	int eva;
577 	struct proc *p = curproc;
578 
579 	eva = rcr2();
580 	va = trunc_page((vm_offset_t)eva);
581 
582 	if (va >= KERNBASE) {
583 		/*
584 		 * Don't allow user-mode faults in kernel address space.
585 		 */
586 		if (usermode)
587 			goto nogo;
588 
589 		map = kernel_map;
590 	} else {
591 		/*
592 		 * This is a fault on non-kernel virtual memory.
593 		 * vm is initialized above to NULL. If curproc is NULL
594 		 * or curproc->p_vmspace is NULL the fault is fatal.
595 		 */
596 		if (p != NULL)
597 			vm = p->p_vmspace;
598 
599 		if (vm == NULL)
600 			goto nogo;
601 
602 		map = &vm->vm_map;
603 	}
604 
605 	if (frame->tf_err & PGEX_W)
606 		ftype = VM_PROT_READ | VM_PROT_WRITE;
607 	else
608 		ftype = VM_PROT_READ;
609 
610 	if (map != kernel_map) {
611 		vm_offset_t v;
612 		vm_page_t ptepg;
613 
614 		/*
615 		 * Keep swapout from messing with us during this
616 		 *	critical time.
617 		 */
618 		++p->p_lock;
619 
620 		/*
621 		 * Grow the stack if necessary
622 		 */
623 		if ((caddr_t)va > vm->vm_maxsaddr
624 		    && (caddr_t)va < (caddr_t)USRSTACK) {
625 			if (!grow(p, va)) {
626 				rv = KERN_FAILURE;
627 				--p->p_lock;
628 				goto nogo;
629 			}
630 		}
631 
632 		/*
633 		 * Check if page table is mapped, if not,
634 		 *	fault it first
635 		 */
636 		v = (vm_offset_t) vtopte(va);
637 
638 		/* Fault the pte only if needed: */
639 		if (*((int *)vtopte(v)) == 0)
640 			(void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE);
641 
642 		pmap_use_pt( vm_map_pmap(map), va);
643 
644 		/* Fault in the user page: */
645 		rv = vm_fault(map, va, ftype, FALSE);
646 
647 		pmap_unuse_pt( vm_map_pmap(map), va);
648 
649 		--p->p_lock;
650 	} else {
651 		/*
652 		 * Since we know that kernel virtual address addresses
653 		 * always have pte pages mapped, we just have to fault
654 		 * the page.
655 		 */
656 		rv = vm_fault(map, va, ftype, FALSE);
657 	}
658 
659 	if (rv == KERN_SUCCESS)
660 		return (0);
661 nogo:
662 	if (!usermode) {
663 		if (curpcb && curpcb->pcb_onfault) {
664 			frame->tf_eip = (int)curpcb->pcb_onfault;
665 			return (0);
666 		}
667 		trap_fatal(frame);
668 		return (-1);
669 	}
670 
671 	/* kludge to pass faulting virtual address to sendsig */
672 	frame->tf_err = eva;
673 
674 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
675 }
676 
677 void
678 trap_fatal(frame)
679 	struct trapframe *frame;
680 {
681 	int code, type, eva;
682 	struct soft_segment_descriptor softseg;
683 
684 	code = frame->tf_err;
685 	type = frame->tf_trapno;
686 	eva = rcr2();
687 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
688 
689 	if (type <= MAX_TRAP_MSG)
690 		printf("\n\nFatal trap %d: %s while in %s mode\n",
691 			type, trap_msg[type],
692 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
693 	if (type == T_PAGEFLT) {
694 		printf("fault virtual address	= 0x%x\n", eva);
695 		printf("fault code		= %s %s, %s\n",
696 			code & PGEX_U ? "user" : "supervisor",
697 			code & PGEX_W ? "write" : "read",
698 			code & PGEX_P ? "protection violation" : "page not present");
699 	}
700 	printf("instruction pointer	= 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip);
701 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
702 	    softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
703 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
704 	    softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran);
705 	printf("processor eflags	= ");
706 	if (frame->tf_eflags & PSL_T)
707 		printf("trace/trap, ");
708 	if (frame->tf_eflags & PSL_I)
709 		printf("interrupt enabled, ");
710 	if (frame->tf_eflags & PSL_NT)
711 		printf("nested task, ");
712 	if (frame->tf_eflags & PSL_RF)
713 		printf("resume, ");
714 	if (frame->tf_eflags & PSL_VM)
715 		printf("vm86, ");
716 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
717 	printf("current process		= ");
718 	if (curproc) {
719 		printf("%lu (%s)\n",
720 		    (u_long)curproc->p_pid, curproc->p_comm ?
721 		    curproc->p_comm : "");
722 	} else {
723 		printf("Idle\n");
724 	}
725 	printf("interrupt mask		= ");
726 	if ((cpl & net_imask) == net_imask)
727 		printf("net ");
728 	if ((cpl & tty_imask) == tty_imask)
729 		printf("tty ");
730 	if ((cpl & bio_imask) == bio_imask)
731 		printf("bio ");
732 	if (cpl == 0)
733 		printf("none");
734 	printf("\n");
735 
736 #ifdef KDB
737 	if (kdb_trap(&psl))
738 		return;
739 #endif
740 #ifdef DDB
741 	if (kdb_trap (type, 0, frame))
742 		return;
743 #endif
744 	if (type <= MAX_TRAP_MSG)
745 		panic(trap_msg[type]);
746 	else
747 		panic("unknown/reserved trap");
748 }
749 
750 /*
751  * Compensate for 386 brain damage (missing URKR).
752  * This is a little simpler than the pagefault handler in trap() because
753  * it the page tables have already been faulted in and high addresses
754  * are thrown out early for other reasons.
755  */
756 int trapwrite(addr)
757 	unsigned addr;
758 {
759 	struct proc *p;
760 	vm_offset_t va, v;
761 	struct vmspace *vm;
762 	int rv;
763 
764 	va = trunc_page((vm_offset_t)addr);
765 	/*
766 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
767 	 */
768 	if (va >= VM_MAXUSER_ADDRESS)
769 		return (1);
770 
771 	p = curproc;
772 	vm = p->p_vmspace;
773 
774 	++p->p_lock;
775 
776 	if ((caddr_t)va >= vm->vm_maxsaddr
777 	    && (caddr_t)va < (caddr_t)USRSTACK) {
778 		if (!grow(p, va)) {
779 			--p->p_lock;
780 			return (1);
781 		}
782 	}
783 
784 	v = trunc_page(vtopte(va));
785 
786 	/*
787 	 * wire the pte page
788 	 */
789 	if (va < USRSTACK) {
790 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
791 	}
792 
793 	/*
794 	 * fault the data page
795 	 */
796 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
797 
798 	/*
799 	 * unwire the pte page
800 	 */
801 	if (va < USRSTACK) {
802 		vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
803 	}
804 
805 	--p->p_lock;
806 
807 	if (rv != KERN_SUCCESS)
808 		return 1;
809 
810 	return (0);
811 }
812 
813 /*
814  * System call request from POSIX system call gate interface to kernel.
815  * Like trap(), argument is call by reference.
816  */
817 void
818 syscall(frame)
819 	struct trapframe frame;
820 {
821 	caddr_t params;
822 	int i;
823 	struct sysent *callp;
824 	struct proc *p = curproc;
825 	u_quad_t sticks;
826 	int error;
827 	int args[8], rval[2];
828 	u_int code;
829 
830 	sticks = p->p_sticks;
831 	if (ISPL(frame.tf_cs) != SEL_UPL)
832 		panic("syscall");
833 
834 	p->p_md.md_regs = (int *)&frame;
835 	params = (caddr_t)frame.tf_esp + sizeof(int);
836 	code = frame.tf_eax;
837 	/*
838 	 * Need to check if this is a 32 bit or 64 bit syscall.
839 	 */
840 	if (code == SYS_syscall) {
841 		/*
842 		 * Code is first argument, followed by actual args.
843 		 */
844 		code = fuword(params);
845 		params += sizeof(int);
846 	} else if (code == SYS___syscall) {
847 		/*
848 		 * Like syscall, but code is a quad, so as to maintain
849 		 * quad alignment for the rest of the arguments.
850 		 */
851 		code = fuword(params);
852 		params += sizeof(quad_t);
853 	}
854 
855  	if (p->p_sysent->sv_mask)
856  		code &= p->p_sysent->sv_mask;
857 
858  	if (code >= p->p_sysent->sv_size)
859  		callp = &p->p_sysent->sv_table[0];
860   	else
861  		callp = &p->p_sysent->sv_table[code];
862 
863 	if ((i = callp->sy_narg * sizeof(int)) &&
864 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
865 #ifdef KTRACE
866 		if (KTRPOINT(p, KTR_SYSCALL))
867 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
868 #endif
869 		goto bad;
870 	}
871 #ifdef KTRACE
872 	if (KTRPOINT(p, KTR_SYSCALL))
873 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
874 #endif
875 	rval[0] = 0;
876 	rval[1] = frame.tf_edx;
877 
878 	error = (*callp->sy_call)(p, args, rval);
879 
880 	switch (error) {
881 
882 	case 0:
883 		/*
884 		 * Reinitialize proc pointer `p' as it may be different
885 		 * if this is a child returning from fork syscall.
886 		 */
887 		p = curproc;
888 		frame.tf_eax = rval[0];
889 		frame.tf_edx = rval[1];
890 		frame.tf_eflags &= ~PSL_C;
891 		break;
892 
893 	case ERESTART:
894 		/*
895 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes.
896 		 */
897 		frame.tf_eip -= 7;
898 		break;
899 
900 	case EJUSTRETURN:
901 		break;
902 
903 	default:
904 bad:
905  		if (p->p_sysent->sv_errsize)
906  			if (error >= p->p_sysent->sv_errsize)
907   				error = -1;	/* XXX */
908    			else
909   				error = p->p_sysent->sv_errtbl[error];
910 		frame.tf_eax = error;
911 		frame.tf_eflags |= PSL_C;
912 		break;
913 	}
914 
915 	if (frame.tf_eflags & PSL_T) {
916 		/* Traced syscall. */
917 		frame.tf_eflags &= ~PSL_T;
918 		trapsignal(p, SIGTRAP, 0);
919 	}
920 
921 	userret(p, &frame, sticks);
922 
923 #ifdef KTRACE
924 	if (KTRPOINT(p, KTR_SYSRET))
925 		ktrsysret(p->p_tracep, code, error, rval[0]);
926 #endif
927 }
928 
929 #ifdef COMPAT_LINUX
930 void
931 linux_syscall(frame)
932 	struct trapframe frame;
933 {
934 	int i;
935 	struct proc *p = curproc;
936 	struct sysent *callp;
937 	u_quad_t sticks;
938 	int error;
939 	int rval[2];
940 	u_int code;
941 	struct linux_syscall_args {
942 		int arg1;
943 		int arg2;
944 		int arg3;
945 		int arg4;
946 		int arg5;
947 	} args;
948 
949 	args.arg1 = frame.tf_ebx;
950 	args.arg2 = frame.tf_ecx;
951 	args.arg3 = frame.tf_edx;
952 	args.arg4 = frame.tf_esi;
953 	args.arg5 = frame.tf_edi;
954 
955 	sticks = p->p_sticks;
956 	if (ISPL(frame.tf_cs) != SEL_UPL)
957 		panic("linux syscall");
958 
959 	p->p_md.md_regs = (int *)&frame;
960 	code = frame.tf_eax;
961 
962 	if (p->p_sysent->sv_mask)
963 		code &= p->p_sysent->sv_mask;
964 
965 	if (code >= p->p_sysent->sv_size)
966 		callp = &p->p_sysent->sv_table[0];
967 	else
968 		callp = &p->p_sysent->sv_table[code];
969 
970 #ifdef KTRACE
971 	if (KTRPOINT(p, KTR_SYSCALL))
972 		ktrsyscall(p->p_tracep, code, callp->sy_narg, (int *)&args);
973 #endif
974 
975 	rval[0] = 0;
976 
977 	error = (*callp->sy_call)(p, &args, rval);
978 
979 	switch (error) {
980 
981 	case 0:
982 		/*
983 		 * Reinitialize proc pointer `p' as it may be different
984 		 * if this is a child returning from fork syscall.
985 		 */
986 		p = curproc;
987 		frame.tf_eax = rval[0];
988 		frame.tf_eflags &= ~PSL_C;
989 		break;
990 
991 	case ERESTART:
992 		/* Reconstruct pc, subtract size of int 0x80 */
993 		frame.tf_eip -= 2;
994 		break;
995 
996 	case EJUSTRETURN:
997 		break;
998 
999 	default:
1000  		if (p->p_sysent->sv_errsize)
1001  			if (error >= p->p_sysent->sv_errsize)
1002   				error = -1;	/* XXX */
1003    			else
1004   				error = p->p_sysent->sv_errtbl[error];
1005 		frame.tf_eax = -error;
1006 		frame.tf_eflags |= PSL_C;
1007 		break;
1008 	}
1009 
1010 	if (frame.tf_eflags & PSL_T) {
1011 		/* Traced syscall. */
1012 		frame.tf_eflags &= ~PSL_T;
1013 		trapsignal(p, SIGTRAP, 0);
1014 	}
1015 
1016 	userret(p, &frame, sticks);
1017 
1018 #ifdef KTRACE
1019 	if (KTRPOINT(p, KTR_SYSRET))
1020 		ktrsysret(p->p_tracep, code, error, rval[0]);
1021 #endif
1022 }
1023 #endif /* COMPAT_LINUX */
1024