1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 37 * $Id$ 38 */ 39 40 /* 41 * 386 Trap and System call handleing 42 */ 43 44 #include "npx.h" 45 #include "machine/cpu.h" 46 #include "machine/psl.h" 47 #include "machine/reg.h" 48 49 #include "param.h" 50 #include "systm.h" 51 #include "proc.h" 52 #include "user.h" 53 #include "acct.h" 54 #include "kernel.h" 55 #ifdef KTRACE 56 #include "ktrace.h" 57 #endif 58 59 #include "vm/vm_param.h" 60 #include "vm/pmap.h" 61 #include "vm/vm_map.h" 62 #include "sys/vmmeter.h" 63 64 #include "machine/trap.h" 65 66 #ifdef __GNUC__ 67 68 /* 69 * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, 70 * we omit the size from the mov instruction to avoid nonfatal bugs in gas. 71 */ 72 #define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) 73 #define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) 74 75 #else /* not __GNUC__ */ 76 77 u_short read_gs __P((void)); 78 void write_gs __P((/* promoted u_short */ int gs)); 79 80 #endif /* __GNUC__ */ 81 82 struct sysent sysent[]; 83 int nsysent; 84 int dostacklimits; 85 unsigned rcr2(); 86 extern short cpl; 87 88 89 /* 90 * trap(frame): 91 * Exception, fault, and trap interface to BSD kernel. This 92 * common code is called from assembly language IDT gate entry 93 * routines that prepare a suitable stack frame, and restore this 94 * frame after the exception has been processed. Note that the 95 * effect is as if the arguments were passed call by reference. 96 */ 97 98 /*ARGSUSED*/ 99 trap(frame) 100 struct trapframe frame; 101 { 102 register int i; 103 register struct proc *p = curproc; 104 struct timeval syst; 105 int ucode, type, code, eva; 106 107 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ 108 type = frame.tf_trapno; 109 #include "ddb.h" 110 #if NDDB > 0 111 if (curpcb && curpcb->pcb_onfault) { 112 if (frame.tf_trapno == T_BPTFLT 113 || frame.tf_trapno == T_TRCTRAP) 114 if (kdb_trap (type, 0, &frame)) 115 return; 116 } 117 #endif 118 119 /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x", 120 frame.tf_trapno, frame.tf_err, frame.tf_eip, 121 frame.tf_cs, rcr2(), frame.tf_esp);*/ 122 if(curpcb == 0 || curproc == 0) goto we_re_toast; 123 if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { 124 extern int _udatasel; 125 126 if (read_gs() != (u_short) _udatasel) 127 /* 128 * Some user has corrupted %gs but we depend on it in 129 * copyout() etc. Fix it up and retry. 130 * 131 * (We don't preserve %fs or %gs, so users can change 132 * them to either _ucodesel, _udatasel or a not-present 133 * selector, possibly ORed with 0 to 3, making them 134 * volatile for other users. Not preserving them saves 135 * time and doesn't lose functionality or open security 136 * holes.) 137 */ 138 write_gs(_udatasel); 139 else 140 copyfault: 141 frame.tf_eip = (int)curpcb->pcb_onfault; 142 return; 143 } 144 145 syst = p->p_stime; 146 if (ISPL(frame.tf_cs) == SEL_UPL) { 147 type |= T_USER; 148 p->p_regs = (int *)&frame; 149 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ 150 } 151 152 ucode=0; 153 eva = rcr2(); 154 code = frame.tf_err; 155 switch (type) { 156 157 default: 158 we_re_toast: 159 #ifdef KDB 160 if (kdb_trap(&psl)) 161 return; 162 #endif 163 #if NDDB > 0 164 if (kdb_trap (type, 0, &frame)) 165 return; 166 #endif 167 168 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", 169 frame.tf_trapno, frame.tf_err, frame.tf_eip, 170 frame.tf_cs, frame.tf_eflags); 171 eva = rcr2(); 172 printf("cr2 %x cpl %x\n", eva, cpl); 173 /* type &= ~T_USER; */ /* XXX what the hell is this */ 174 panic("trap"); 175 /*NOTREACHED*/ 176 177 case T_SEGNPFLT|T_USER: 178 case T_STKFLT|T_USER: 179 case T_PROTFLT|T_USER: /* protection fault */ 180 ucode = code + BUS_SEGM_FAULT ; 181 i = SIGBUS; 182 break; 183 184 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 185 case T_RESADFLT|T_USER: /* reserved addressing fault */ 186 case T_RESOPFLT|T_USER: /* reserved operand fault */ 187 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 188 ucode = type &~ T_USER; 189 i = SIGILL; 190 break; 191 192 case T_ASTFLT|T_USER: /* Allow process switch */ 193 astoff(); 194 cnt.v_soft++; 195 if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { 196 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 197 p->p_flag &= ~SOWEUPC; 198 } 199 goto out; 200 201 case T_DNA|T_USER: 202 #if NNPX > 0 203 /* if a transparent fault (due to context switch "late") */ 204 if (npxdna()) return; 205 #endif /* NNPX > 0 */ 206 #ifdef MATH_EMULATE 207 i = math_emulate(&frame); 208 if (i == 0) return; 209 #else /* MATH_EMULTATE */ 210 panic("trap: math emulation necessary!"); 211 #endif /* MATH_EMULTATE */ 212 ucode = FPE_FPU_NP_TRAP; 213 break; 214 215 case T_BOUND|T_USER: 216 ucode = FPE_SUBRNG_TRAP; 217 i = SIGFPE; 218 break; 219 220 case T_OFLOW|T_USER: 221 ucode = FPE_INTOVF_TRAP; 222 i = SIGFPE; 223 break; 224 225 case T_DIVIDE|T_USER: 226 ucode = FPE_INTDIV_TRAP; 227 i = SIGFPE; 228 break; 229 230 case T_ARITHTRAP|T_USER: 231 ucode = code; 232 i = SIGFPE; 233 break; 234 235 case T_PAGEFLT: /* allow page faults in kernel mode */ 236 #if 0 237 /* XXX - check only applies to 386's and 486's with WP off */ 238 if (code & PGEX_P) goto we_re_toast; 239 #endif 240 241 /* fall into */ 242 case T_PAGEFLT|T_USER: /* page fault */ 243 { 244 register vm_offset_t va; 245 register struct vmspace *vm = p->p_vmspace; 246 register vm_map_t map; 247 int rv; 248 vm_prot_t ftype; 249 extern vm_map_t kernel_map; 250 unsigned nss,v; 251 252 va = trunc_page((vm_offset_t)eva); 253 /* 254 * It is only a kernel address space fault iff: 255 * 1. (type & T_USER) == 0 and 256 * 2. pcb_onfault not set or 257 * 3. pcb_onfault set but supervisor space fault 258 * The last can occur during an exec() copyin where the 259 * argument space is lazy-allocated. 260 */ 261 if (type == T_PAGEFLT && va >= KERNBASE) 262 map = kernel_map; 263 else 264 map = &vm->vm_map; 265 if (code & PGEX_W) 266 ftype = VM_PROT_READ | VM_PROT_WRITE; 267 else 268 ftype = VM_PROT_READ; 269 270 #ifdef DEBUG 271 if (map == kernel_map && va == 0) { 272 printf("trap: bad kernel access at %x\n", va); 273 goto we_re_toast; 274 } 275 #endif 276 277 /* 278 * XXX: rude hack to make stack limits "work" 279 */ 280 nss = 0; 281 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map 282 && dostacklimits) { 283 nss = clrnd(btoc((unsigned)vm->vm_maxsaddr 284 + MAXSSIZ - (unsigned)va)); 285 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 286 /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/ 287 rv = KERN_FAILURE; 288 goto nogo; 289 } 290 } 291 292 /* check if page table is mapped, if not, fault it first */ 293 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) 294 if (!pde_v(va)) { 295 v = trunc_page(vtopte(va)); 296 rv = vm_fault(map, v, ftype, FALSE); 297 if (rv != KERN_SUCCESS) goto nogo; 298 /* check if page table fault, increment wiring */ 299 vm_map_pageable(map, v, round_page(v+1), FALSE); 300 } else v=0; 301 rv = vm_fault(map, va, ftype, FALSE); 302 if (rv == KERN_SUCCESS) { 303 /* 304 * XXX: continuation of rude stack hack 305 */ 306 if (nss > vm->vm_ssize) 307 vm->vm_ssize = nss; 308 va = trunc_page(vtopte(va)); 309 /* for page table, increment wiring 310 as long as not a page table fault as well */ 311 if (!v && type != T_PAGEFLT) 312 vm_map_pageable(map, va, round_page(va+1), FALSE); 313 if (type == T_PAGEFLT) 314 return; 315 goto out; 316 } 317 nogo: 318 if (type == T_PAGEFLT) { 319 if (curpcb->pcb_onfault) 320 goto copyfault; 321 printf("vm_fault(%x, %x, %x, 0) -> %x\n", 322 map, va, ftype, rv); 323 printf(" type %x, code %x\n", 324 type, code); 325 goto we_re_toast; 326 } 327 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; 328 break; 329 } 330 331 #if NDDB == 0 332 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 333 frame.tf_eflags &= ~PSL_T; 334 335 /* Q: how do we turn it on again? */ 336 return; 337 #endif 338 339 case T_BPTFLT|T_USER: /* bpt instruction fault */ 340 case T_TRCTRAP|T_USER: /* trace trap */ 341 frame.tf_eflags &= ~PSL_T; 342 i = SIGTRAP; 343 break; 344 345 #include "isa.h" 346 #if NISA > 0 347 case T_NMI: 348 case T_NMI|T_USER: 349 #if NDDB > 0 350 /* NMI can be hooked up to a pushbutton for debugging */ 351 printf ("NMI ... going to debugger\n"); 352 if (kdb_trap (type, 0, &frame)) 353 return; 354 #endif 355 /* machine/parity/power fail/"kitchen sink" faults */ 356 if(isa_nmi(code) == 0) return; 357 else goto we_re_toast; 358 #endif 359 } 360 361 trapsignal(p, i, ucode); 362 if ((type & T_USER) == 0) 363 return; 364 out: 365 while (i = CURSIG(p)) 366 psig(i); 367 p->p_pri = p->p_usrpri; 368 if (want_resched) { 369 /* 370 * Since we are curproc, clock will normally just change 371 * our priority without moving us from one queue to another 372 * (since the running process is not on a queue.) 373 * If that happened after we setrq ourselves but before we 374 * swtch()'ed, we might not be on the queue indicated by 375 * our priority. 376 */ 377 (void) splclock(); 378 setrq(p); 379 p->p_stats->p_ru.ru_nivcsw++; 380 swtch(); 381 (void) splnone(); 382 while (i = CURSIG(p)) 383 psig(i); 384 } 385 if (p->p_stats->p_prof.pr_scale) { 386 int ticks; 387 struct timeval *tv = &p->p_stime; 388 389 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 390 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 391 if (ticks) { 392 #ifdef PROFTIMER 393 extern int profscale; 394 addupc(frame.tf_eip, &p->p_stats->p_prof, 395 ticks * profscale); 396 #else 397 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); 398 #endif 399 } 400 } 401 curpri = p->p_pri; 402 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 403 } 404 405 /* 406 * Compensate for 386 brain damage (missing URKR). 407 * This is a little simpler than the pagefault handler in trap() because 408 * it the page tables have already been faulted in and high addresses 409 * are thrown out early for other reasons. 410 */ 411 int trapwrite(addr) 412 unsigned addr; 413 { 414 unsigned nss; 415 struct proc *p; 416 vm_offset_t va; 417 struct vmspace *vm; 418 419 va = trunc_page((vm_offset_t)addr); 420 /* 421 * XXX - MAX is END. Changed > to >= for temp. fix. 422 */ 423 if (va >= VM_MAXUSER_ADDRESS) 424 return (1); 425 /* 426 * XXX: rude stack hack adapted from trap(). 427 */ 428 nss = 0; 429 p = curproc; 430 vm = p->p_vmspace; 431 if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { 432 nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ 433 - (unsigned)va)); 434 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) 435 return (1); 436 } 437 438 if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) 439 != KERN_SUCCESS) 440 return (1); 441 442 /* 443 * XXX: continuation of rude stack hack 444 */ 445 if (nss > vm->vm_ssize) 446 vm->vm_ssize = nss; 447 448 return (0); 449 } 450 451 /* 452 * syscall(frame): 453 * System call request from POSIX system call gate interface to kernel. 454 * Like trap(), argument is call by reference. 455 */ 456 /*ARGSUSED*/ 457 syscall(frame) 458 volatile struct syscframe frame; 459 { 460 register int *locr0 = ((int *)&frame); 461 register caddr_t params; 462 register int i; 463 register struct sysent *callp; 464 register struct proc *p = curproc; 465 struct timeval syst; 466 int error, opc; 467 int args[8], rval[2]; 468 int code; 469 470 #ifdef lint 471 r0 = 0; r0 = r0; r1 = 0; r1 = r1; 472 #endif 473 syst = p->p_stime; 474 if (ISPL(frame.sf_cs) != SEL_UPL) 475 panic("syscall"); 476 477 code = frame.sf_eax; 478 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 479 p->p_regs = (int *)&frame; 480 params = (caddr_t)frame.sf_esp + sizeof (int) ; 481 482 /* 483 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 484 */ 485 opc = frame.sf_eip - 7; 486 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 487 if (callp == sysent) { 488 i = fuword(params); 489 params += sizeof (int); 490 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 491 } 492 493 if ((i = callp->sy_narg * sizeof (int)) && 494 (error = copyin(params, (caddr_t)args, (u_int)i))) { 495 frame.sf_eax = error; 496 frame.sf_eflags |= PSL_C; /* carry bit */ 497 #ifdef KTRACE 498 if (KTRPOINT(p, KTR_SYSCALL)) 499 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 500 #endif 501 goto done; 502 } 503 #ifdef KTRACE 504 if (KTRPOINT(p, KTR_SYSCALL)) 505 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 506 #endif 507 rval[0] = 0; 508 rval[1] = frame.sf_edx; 509 /*pg("%d. s %d\n", p->p_pid, code);*/ 510 error = (*callp->sy_call)(p, args, rval); 511 if (error == ERESTART) 512 frame.sf_eip = opc; 513 else if (error != EJUSTRETURN) { 514 if (error) { 515 /*pg("error %d", error);*/ 516 frame.sf_eax = error; 517 frame.sf_eflags |= PSL_C; /* carry bit */ 518 } else { 519 frame.sf_eax = rval[0]; 520 frame.sf_edx = rval[1]; 521 frame.sf_eflags &= ~PSL_C; /* carry bit */ 522 } 523 } 524 /* else if (error == EJUSTRETURN) */ 525 /* nothing to do */ 526 done: 527 /* 528 * Reinitialize proc pointer `p' as it may be different 529 * if this is a child returning from fork syscall. 530 */ 531 p = curproc; 532 while (i = CURSIG(p)) 533 psig(i); 534 p->p_pri = p->p_usrpri; 535 if (want_resched) { 536 /* 537 * Since we are curproc, clock will normally just change 538 * our priority without moving us from one queue to another 539 * (since the running process is not on a queue.) 540 * If that happened after we setrq ourselves but before we 541 * swtch()'ed, we might not be on the queue indicated by 542 * our priority. 543 */ 544 (void) splclock(); 545 setrq(p); 546 p->p_stats->p_ru.ru_nivcsw++; 547 swtch(); 548 (void) splnone(); 549 while (i = CURSIG(p)) 550 psig(i); 551 } 552 if (p->p_stats->p_prof.pr_scale) { 553 int ticks; 554 struct timeval *tv = &p->p_stime; 555 556 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 557 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 558 if (ticks) { 559 #ifdef PROFTIMER 560 extern int profscale; 561 addupc(frame.sf_eip, &p->p_stats->p_prof, 562 ticks * profscale); 563 #else 564 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); 565 #endif 566 } 567 } 568 curpri = p->p_pri; 569 #ifdef KTRACE 570 if (KTRPOINT(p, KTR_SYSRET)) 571 ktrsysret(p->p_tracep, code, error, rval[0]); 572 #endif 573 #ifdef DIAGNOSTICx 574 { extern int _udatasel, _ucodesel; 575 if (frame.sf_ss != _udatasel) 576 printf("ss %x call %d\n", frame.sf_ss, code); 577 if ((frame.sf_cs&0xffff) != _ucodesel) 578 printf("cs %x call %d\n", frame.sf_cs, code); 579 if (frame.sf_eip > VM_MAXUSER_ADDRESS) { 580 printf("eip %x call %d\n", frame.sf_eip, code); 581 frame.sf_eip = 0; 582 } 583 } 584 #endif 585 } 586