xref: /freebsd/sys/kern/subr_trap.c (revision 960173b9b283675c49eee318c6ca8c12ecb5d188)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
37  *	$Id$
38  */
39 
40 /*
41  * 386 Trap and System call handleing
42  */
43 
44 #include "npx.h"
45 #include "machine/cpu.h"
46 #include "machine/psl.h"
47 #include "machine/reg.h"
48 
49 #include "param.h"
50 #include "systm.h"
51 #include "proc.h"
52 #include "user.h"
53 #include "acct.h"
54 #include "kernel.h"
55 #ifdef KTRACE
56 #include "ktrace.h"
57 #endif
58 
59 #include "vm/vm_param.h"
60 #include "vm/pmap.h"
61 #include "vm/vm_map.h"
62 #include "sys/vmmeter.h"
63 
64 #include "machine/trap.h"
65 
66 #ifdef	__GNUC__
67 
68 /*
69  * The "r" contraint could be "rm" except for fatal bugs in gas.  As usual,
70  * we omit the size from the mov instruction to avoid nonfatal bugs in gas.
71  */
72 #define	read_gs()	({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
73 #define	write_gs(gs)	__asm("mov %0,%%gs" : : "r" ((u_short) gs))
74 
75 #else	/* not __GNUC__ */
76 
77 u_short	read_gs		__P((void));
78 void	write_gs	__P((/* promoted u_short */ int gs));
79 
80 #endif	/* __GNUC__ */
81 
82 struct	sysent sysent[];
83 int	nsysent;
84 int dostacklimits;
85 unsigned rcr2();
86 extern short cpl;
87 
88 
89 /*
90  * trap(frame):
91  *	Exception, fault, and trap interface to BSD kernel. This
92  * common code is called from assembly language IDT gate entry
93  * routines that prepare a suitable stack frame, and restore this
94  * frame after the exception has been processed. Note that the
95  * effect is as if the arguments were passed call by reference.
96  */
97 
98 /*ARGSUSED*/
99 trap(frame)
100 	struct trapframe frame;
101 {
102 	register int i;
103 	register struct proc *p = curproc;
104 	struct timeval syst;
105 	int ucode, type, code, eva;
106 
107 	frame.tf_eflags &= ~PSL_NT;	/* clear nested trap XXX */
108 	type = frame.tf_trapno;
109 #include "ddb.h"
110 #if NDDB > 0
111 	if (curpcb && curpcb->pcb_onfault) {
112 		if (frame.tf_trapno == T_BPTFLT
113 		    || frame.tf_trapno == T_TRCTRAP)
114 			if (kdb_trap (type, 0, &frame))
115 				return;
116 	}
117 #endif
118 
119 /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x",
120 			frame.tf_trapno, frame.tf_err, frame.tf_eip,
121 			frame.tf_cs, rcr2(), frame.tf_esp);*/
122 if(curpcb == 0 || curproc == 0) goto we_re_toast;
123 	if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
124 		extern int _udatasel;
125 
126 		if (read_gs() != (u_short) _udatasel)
127 			/*
128 			 * Some user has corrupted %gs but we depend on it in
129 			 * copyout() etc.  Fix it up and retry.
130 			 *
131 			 * (We don't preserve %fs or %gs, so users can change
132 			 * them to either _ucodesel, _udatasel or a not-present
133 			 * selector, possibly ORed with 0 to 3, making them
134 			 * volatile for other users.  Not preserving them saves
135 			 * time and doesn't lose functionality or open security
136 			 * holes.)
137 			 */
138 			write_gs(_udatasel);
139 		else
140 copyfault:
141 			frame.tf_eip = (int)curpcb->pcb_onfault;
142 		return;
143 	}
144 
145 	syst = p->p_stime;
146 	if (ISPL(frame.tf_cs) == SEL_UPL) {
147 		type |= T_USER;
148 		p->p_regs = (int *)&frame;
149 		curpcb->pcb_flags |= FM_TRAP;	/* used by sendsig */
150 	}
151 
152 	ucode=0;
153 	eva = rcr2();
154 	code = frame.tf_err;
155 	switch (type) {
156 
157 	default:
158 	we_re_toast:
159 #ifdef KDB
160 		if (kdb_trap(&psl))
161 			return;
162 #endif
163 #if NDDB > 0
164 		if (kdb_trap (type, 0, &frame))
165 			return;
166 #endif
167 
168 		printf("trap type %d code = %x eip = %x cs = %x eflags = %x ",
169 			frame.tf_trapno, frame.tf_err, frame.tf_eip,
170 			frame.tf_cs, frame.tf_eflags);
171 	eva = rcr2();
172 		printf("cr2 %x cpl %x\n", eva, cpl);
173 		/* type &= ~T_USER; */ /* XXX what the hell is this */
174 		panic("trap");
175 		/*NOTREACHED*/
176 
177 	case T_SEGNPFLT|T_USER:
178 	case T_STKFLT|T_USER:
179 	case T_PROTFLT|T_USER:		/* protection fault */
180 		ucode = code + BUS_SEGM_FAULT ;
181 		i = SIGBUS;
182 		break;
183 
184 	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
185 	case T_RESADFLT|T_USER:		/* reserved addressing fault */
186 	case T_RESOPFLT|T_USER:		/* reserved operand fault */
187 	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
188 		ucode = type &~ T_USER;
189 		i = SIGILL;
190 		break;
191 
192 	case T_ASTFLT|T_USER:		/* Allow process switch */
193 		astoff();
194 		cnt.v_soft++;
195 		if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
196 			addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
197 			p->p_flag &= ~SOWEUPC;
198 		}
199 		goto out;
200 
201 	case T_DNA|T_USER:
202 #if NNPX > 0
203 		/* if a transparent fault (due to context switch "late") */
204 		if (npxdna()) return;
205 #endif	/* NNPX > 0 */
206 #ifdef	MATH_EMULATE
207 		i = math_emulate(&frame);
208 		if (i == 0) return;
209 #else	/* MATH_EMULTATE */
210 		panic("trap: math emulation necessary!");
211 #endif	/* MATH_EMULTATE */
212 		ucode = FPE_FPU_NP_TRAP;
213 		break;
214 
215 	case T_BOUND|T_USER:
216 		ucode = FPE_SUBRNG_TRAP;
217 		i = SIGFPE;
218 		break;
219 
220 	case T_OFLOW|T_USER:
221 		ucode = FPE_INTOVF_TRAP;
222 		i = SIGFPE;
223 		break;
224 
225 	case T_DIVIDE|T_USER:
226 		ucode = FPE_INTDIV_TRAP;
227 		i = SIGFPE;
228 		break;
229 
230 	case T_ARITHTRAP|T_USER:
231 		ucode = code;
232 		i = SIGFPE;
233 		break;
234 
235 	case T_PAGEFLT:			/* allow page faults in kernel mode */
236 #if 0
237 		/* XXX - check only applies to 386's and 486's with WP off */
238 		if (code & PGEX_P) goto we_re_toast;
239 #endif
240 
241 		/* fall into */
242 	case T_PAGEFLT|T_USER:		/* page fault */
243 	    {
244 		register vm_offset_t va;
245 		register struct vmspace *vm = p->p_vmspace;
246 		register vm_map_t map;
247 		int rv;
248 		vm_prot_t ftype;
249 		extern vm_map_t kernel_map;
250 		unsigned nss,v;
251 
252 		va = trunc_page((vm_offset_t)eva);
253 		/*
254 		 * It is only a kernel address space fault iff:
255 		 * 	1. (type & T_USER) == 0  and
256 		 * 	2. pcb_onfault not set or
257 		 *	3. pcb_onfault set but supervisor space fault
258 		 * The last can occur during an exec() copyin where the
259 		 * argument space is lazy-allocated.
260 		 */
261 		if (type == T_PAGEFLT && va >= KERNBASE)
262 			map = kernel_map;
263 		else
264 			map = &vm->vm_map;
265 		if (code & PGEX_W)
266 			ftype = VM_PROT_READ | VM_PROT_WRITE;
267 		else
268 			ftype = VM_PROT_READ;
269 
270 #ifdef DEBUG
271 		if (map == kernel_map && va == 0) {
272 			printf("trap: bad kernel access at %x\n", va);
273 			goto we_re_toast;
274 		}
275 #endif
276 
277 		/*
278 		 * XXX: rude hack to make stack limits "work"
279 		 */
280 		nss = 0;
281 		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map
282 			&& dostacklimits) {
283 			nss = clrnd(btoc((unsigned)vm->vm_maxsaddr
284 				+ MAXSSIZ - (unsigned)va));
285 			if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
286 /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/
287 				rv = KERN_FAILURE;
288 				goto nogo;
289 			}
290 		}
291 
292 		/* check if page table is mapped, if not, fault it first */
293 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
294 		if (!pde_v(va)) {
295 			v = trunc_page(vtopte(va));
296 			rv = vm_fault(map, v, ftype, FALSE);
297 			if (rv != KERN_SUCCESS) goto nogo;
298 			/* check if page table fault, increment wiring */
299 			vm_map_pageable(map, v, round_page(v+1), FALSE);
300 		} else v=0;
301 		rv = vm_fault(map, va, ftype, FALSE);
302 		if (rv == KERN_SUCCESS) {
303 			/*
304 			 * XXX: continuation of rude stack hack
305 			 */
306 			if (nss > vm->vm_ssize)
307 				vm->vm_ssize = nss;
308 			va = trunc_page(vtopte(va));
309 			/* for page table, increment wiring
310 			   as long as not a page table fault as well */
311 			if (!v && type != T_PAGEFLT)
312 			  vm_map_pageable(map, va, round_page(va+1), FALSE);
313 			if (type == T_PAGEFLT)
314 				return;
315 			goto out;
316 		}
317 nogo:
318 		if (type == T_PAGEFLT) {
319 			if (curpcb->pcb_onfault)
320 				goto copyfault;
321 			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
322 			       map, va, ftype, rv);
323 			printf("  type %x, code %x\n",
324 			       type, code);
325 			goto we_re_toast;
326 		}
327 		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
328 		break;
329 	    }
330 
331 #if NDDB == 0
332 	case T_TRCTRAP:	 /* trace trap -- someone single stepping lcall's */
333 		frame.tf_eflags &= ~PSL_T;
334 
335 			/* Q: how do we turn it on again? */
336 		return;
337 #endif
338 
339 	case T_BPTFLT|T_USER:		/* bpt instruction fault */
340 	case T_TRCTRAP|T_USER:		/* trace trap */
341 		frame.tf_eflags &= ~PSL_T;
342 		i = SIGTRAP;
343 		break;
344 
345 #include "isa.h"
346 #if	NISA > 0
347 	case T_NMI:
348 	case T_NMI|T_USER:
349 #if NDDB > 0
350 		/* NMI can be hooked up to a pushbutton for debugging */
351 		printf ("NMI ... going to debugger\n");
352 		if (kdb_trap (type, 0, &frame))
353 			return;
354 #endif
355 		/* machine/parity/power fail/"kitchen sink" faults */
356 		if(isa_nmi(code) == 0) return;
357 		else goto we_re_toast;
358 #endif
359 	}
360 
361 	trapsignal(p, i, ucode);
362 	if ((type & T_USER) == 0)
363 		return;
364 out:
365 	while (i = CURSIG(p))
366 		psig(i);
367 	p->p_pri = p->p_usrpri;
368 	if (want_resched) {
369 		/*
370 		 * Since we are curproc, clock will normally just change
371 		 * our priority without moving us from one queue to another
372 		 * (since the running process is not on a queue.)
373 		 * If that happened after we setrq ourselves but before we
374 		 * swtch()'ed, we might not be on the queue indicated by
375 		 * our priority.
376 		 */
377 		(void) splclock();
378 		setrq(p);
379 		p->p_stats->p_ru.ru_nivcsw++;
380 		swtch();
381 		(void) splnone();
382 		while (i = CURSIG(p))
383 			psig(i);
384 	}
385 	if (p->p_stats->p_prof.pr_scale) {
386 		int ticks;
387 		struct timeval *tv = &p->p_stime;
388 
389 		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
390 			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
391 		if (ticks) {
392 #ifdef PROFTIMER
393 			extern int profscale;
394 			addupc(frame.tf_eip, &p->p_stats->p_prof,
395 			    ticks * profscale);
396 #else
397 			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
398 #endif
399 		}
400 	}
401 	curpri = p->p_pri;
402 	curpcb->pcb_flags &= ~FM_TRAP;	/* used by sendsig */
403 }
404 
405 /*
406  * Compensate for 386 brain damage (missing URKR).
407  * This is a little simpler than the pagefault handler in trap() because
408  * it the page tables have already been faulted in and high addresses
409  * are thrown out early for other reasons.
410  */
411 int trapwrite(addr)
412 	unsigned addr;
413 {
414 	unsigned nss;
415 	struct proc *p;
416 	vm_offset_t va;
417 	struct vmspace *vm;
418 
419 	va = trunc_page((vm_offset_t)addr);
420 	/*
421 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
422 	 */
423 	if (va >= VM_MAXUSER_ADDRESS)
424 		return (1);
425 	/*
426 	 * XXX: rude stack hack adapted from trap().
427 	 */
428 	nss = 0;
429 	p = curproc;
430 	vm = p->p_vmspace;
431 	if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) {
432 		nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ
433 				 - (unsigned)va));
434 		if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
435 			return (1);
436 	}
437 
438 	if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE)
439 	    != KERN_SUCCESS)
440 		return (1);
441 
442 	/*
443 	 * XXX: continuation of rude stack hack
444 	 */
445 	if (nss > vm->vm_ssize)
446 		vm->vm_ssize = nss;
447 
448 	return (0);
449 }
450 
451 /*
452  * syscall(frame):
453  *	System call request from POSIX system call gate interface to kernel.
454  * Like trap(), argument is call by reference.
455  */
456 /*ARGSUSED*/
457 syscall(frame)
458 	volatile struct syscframe frame;
459 {
460 	register int *locr0 = ((int *)&frame);
461 	register caddr_t params;
462 	register int i;
463 	register struct sysent *callp;
464 	register struct proc *p = curproc;
465 	struct timeval syst;
466 	int error, opc;
467 	int args[8], rval[2];
468 	int code;
469 
470 #ifdef lint
471 	r0 = 0; r0 = r0; r1 = 0; r1 = r1;
472 #endif
473 	syst = p->p_stime;
474 	if (ISPL(frame.sf_cs) != SEL_UPL)
475 		panic("syscall");
476 
477 	code = frame.sf_eax;
478 	curpcb->pcb_flags &= ~FM_TRAP;	/* used by sendsig */
479 	p->p_regs = (int *)&frame;
480 	params = (caddr_t)frame.sf_esp + sizeof (int) ;
481 
482 	/*
483 	 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
484 	 */
485 	opc = frame.sf_eip - 7;
486 	callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
487 	if (callp == sysent) {
488 		i = fuword(params);
489 		params += sizeof (int);
490 		callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
491 	}
492 
493 	if ((i = callp->sy_narg * sizeof (int)) &&
494 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
495 		frame.sf_eax = error;
496 		frame.sf_eflags |= PSL_C;	/* carry bit */
497 #ifdef KTRACE
498 		if (KTRPOINT(p, KTR_SYSCALL))
499 			ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
500 #endif
501 		goto done;
502 	}
503 #ifdef KTRACE
504 	if (KTRPOINT(p, KTR_SYSCALL))
505 		ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
506 #endif
507 	rval[0] = 0;
508 	rval[1] = frame.sf_edx;
509 /*pg("%d. s %d\n", p->p_pid, code);*/
510 	error = (*callp->sy_call)(p, args, rval);
511 	if (error == ERESTART)
512 		frame.sf_eip = opc;
513 	else if (error != EJUSTRETURN) {
514 		if (error) {
515 /*pg("error %d", error);*/
516 			frame.sf_eax = error;
517 			frame.sf_eflags |= PSL_C;	/* carry bit */
518 		} else {
519 			frame.sf_eax = rval[0];
520 			frame.sf_edx = rval[1];
521 			frame.sf_eflags &= ~PSL_C;	/* carry bit */
522 		}
523 	}
524 	/* else if (error == EJUSTRETURN) */
525 		/* nothing to do */
526 done:
527 	/*
528 	 * Reinitialize proc pointer `p' as it may be different
529 	 * if this is a child returning from fork syscall.
530 	 */
531 	p = curproc;
532 	while (i = CURSIG(p))
533 		psig(i);
534 	p->p_pri = p->p_usrpri;
535 	if (want_resched) {
536 		/*
537 		 * Since we are curproc, clock will normally just change
538 		 * our priority without moving us from one queue to another
539 		 * (since the running process is not on a queue.)
540 		 * If that happened after we setrq ourselves but before we
541 		 * swtch()'ed, we might not be on the queue indicated by
542 		 * our priority.
543 		 */
544 		(void) splclock();
545 		setrq(p);
546 		p->p_stats->p_ru.ru_nivcsw++;
547 		swtch();
548 		(void) splnone();
549 		while (i = CURSIG(p))
550 			psig(i);
551 	}
552 	if (p->p_stats->p_prof.pr_scale) {
553 		int ticks;
554 		struct timeval *tv = &p->p_stime;
555 
556 		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
557 			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
558 		if (ticks) {
559 #ifdef PROFTIMER
560 			extern int profscale;
561 			addupc(frame.sf_eip, &p->p_stats->p_prof,
562 			    ticks * profscale);
563 #else
564 			addupc(frame.sf_eip, &p->p_stats->p_prof, ticks);
565 #endif
566 		}
567 	}
568 	curpri = p->p_pri;
569 #ifdef KTRACE
570 	if (KTRPOINT(p, KTR_SYSRET))
571 		ktrsysret(p->p_tracep, code, error, rval[0]);
572 #endif
573 #ifdef	DIAGNOSTICx
574 { extern int _udatasel, _ucodesel;
575 	if (frame.sf_ss != _udatasel)
576 		printf("ss %x call %d\n", frame.sf_ss, code);
577 	if ((frame.sf_cs&0xffff) != _ucodesel)
578 		printf("cs %x call %d\n", frame.sf_cs, code);
579 	if (frame.sf_eip > VM_MAXUSER_ADDRESS) {
580 		printf("eip %x call %d\n", frame.sf_eip, code);
581 		frame.sf_eip = 0;
582 	}
583 }
584 #endif
585 }
586