1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD$ 39 */ 40 41 /* 42 * 386 Trap and System call handling 43 */ 44 45 #include "opt_cpu.h" 46 #include "opt_ddb.h" 47 #include "opt_ktrace.h" 48 #include "opt_clock.h" 49 #include "opt_trap.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/pioctl.h> 55 #include <sys/kernel.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signalvar.h> 58 #include <sys/syscall.h> 59 #include <sys/sysent.h> 60 #include <sys/uio.h> 61 #include <sys/vmmeter.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <sys/lock.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_kern.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_extern.h> 74 75 #include <machine/cpu.h> 76 #include <machine/ipl.h> 77 #include <machine/md_var.h> 78 #include <machine/pcb.h> 79 #ifdef SMP 80 #include <machine/smp.h> 81 #endif 82 #include <machine/tss.h> 83 84 #include <i386/isa/intr_machdep.h> 85 86 #ifdef POWERFAIL_NMI 87 #include <sys/syslog.h> 88 #include <machine/clock.h> 89 #endif 90 91 #include <machine/vm86.h> 92 93 #include <ddb/ddb.h> 94 95 #include "isa.h" 96 #include "npx.h" 97 98 int (*pmath_emulate) __P((struct trapframe *)); 99 100 extern void trap __P((struct trapframe frame)); 101 extern int trapwrite __P((unsigned addr)); 102 extern void syscall2 __P((struct trapframe frame)); 103 104 static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); 105 static void trap_fatal __P((struct trapframe *, vm_offset_t)); 106 void dblfault_handler __P((void)); 107 108 extern inthand_t IDTVEC(syscall); 109 110 #define MAX_TRAP_MSG 28 111 static char *trap_msg[] = { 112 "", /* 0 unused */ 113 "privileged instruction fault", /* 1 T_PRIVINFLT */ 114 "", /* 2 unused */ 115 "breakpoint instruction fault", /* 3 T_BPTFLT */ 116 "", /* 4 unused */ 117 "", /* 5 unused */ 118 "arithmetic trap", /* 6 T_ARITHTRAP */ 119 "system forced exception", /* 7 T_ASTFLT */ 120 "", /* 8 unused */ 121 "general protection fault", /* 9 T_PROTFLT */ 122 "trace trap", /* 10 T_TRCTRAP */ 123 "", /* 11 unused */ 124 "page fault", /* 12 T_PAGEFLT */ 125 "", /* 13 unused */ 126 "alignment fault", /* 14 T_ALIGNFLT */ 127 "", /* 15 unused */ 128 "", /* 16 unused */ 129 "", /* 17 unused */ 130 "integer divide fault", /* 18 T_DIVIDE */ 131 "non-maskable interrupt trap", /* 19 T_NMI */ 132 "overflow trap", /* 20 T_OFLOW */ 133 "FPU bounds check fault", /* 21 T_BOUND */ 134 "FPU device not available", /* 22 T_DNA */ 135 "double fault", /* 23 T_DOUBLEFLT */ 136 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 137 "invalid TSS fault", /* 25 T_TSSFLT */ 138 "segment not present fault", /* 26 T_SEGNPFLT */ 139 "stack fault", /* 27 T_STKFLT */ 140 "machine check trap", /* 28 T_MCHK */ 141 }; 142 143 static __inline int userret __P((struct proc *p, struct trapframe *frame, 144 u_quad_t oticks, int have_mplock)); 145 146 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 147 extern int has_f00f_bug; 148 #endif 149 150 static __inline int 151 userret(p, frame, oticks, have_mplock) 152 struct proc *p; 153 struct trapframe *frame; 154 u_quad_t oticks; 155 int have_mplock; 156 { 157 int sig, s; 158 159 while ((sig = CURSIG(p)) != 0) { 160 if (have_mplock == 0) { 161 get_mplock(); 162 have_mplock = 1; 163 } 164 postsig(sig); 165 } 166 167 p->p_priority = p->p_usrpri; 168 if (resched_wanted()) { 169 /* 170 * Since we are curproc, clock will normally just change 171 * our priority without moving us from one queue to another 172 * (since the running process is not on a queue.) 173 * If that happened after we setrunqueue ourselves but before we 174 * mi_switch()'ed, we might not be on the queue indicated by 175 * our priority. 176 */ 177 if (have_mplock == 0) { 178 get_mplock(); 179 have_mplock = 1; 180 } 181 s = splhigh(); 182 setrunqueue(p); 183 p->p_stats->p_ru.ru_nivcsw++; 184 mi_switch(); 185 splx(s); 186 while ((sig = CURSIG(p)) != 0) 187 postsig(sig); 188 } 189 /* 190 * Charge system time if profiling. 191 */ 192 if (p->p_flag & P_PROFIL) { 193 if (have_mplock == 0) { 194 get_mplock(); 195 have_mplock = 1; 196 } 197 addupc_task(p, frame->tf_eip, 198 (u_int)(p->p_sticks - oticks) * psratio); 199 } 200 curpriority = p->p_priority; 201 return(have_mplock); 202 } 203 204 /* 205 * Exception, fault, and trap interface to the FreeBSD kernel. 206 * This common code is called from assembly language IDT gate entry 207 * routines that prepare a suitable stack frame, and restore this 208 * frame after the exception has been processed. 209 */ 210 211 void 212 trap(frame) 213 struct trapframe frame; 214 { 215 struct proc *p = curproc; 216 u_quad_t sticks = 0; 217 int i = 0, ucode = 0, type, code; 218 vm_offset_t eva; 219 220 if (!(frame.tf_eflags & PSL_I)) { 221 /* 222 * Buggy application or kernel code has disabled interrupts 223 * and then trapped. Enabling interrupts now is wrong, but 224 * it is better than running with interrupts disabled until 225 * they are accidentally enabled later. 226 */ 227 type = frame.tf_trapno; 228 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) 229 printf( 230 "pid %ld (%s): trap %d with interrupts disabled\n", 231 (long)curproc->p_pid, curproc->p_comm, type); 232 else if (type != T_BPTFLT && type != T_TRCTRAP) 233 /* 234 * XXX not quite right, since this may be for a 235 * multiple fault in user mode. 236 */ 237 printf("kernel trap %d with interrupts disabled\n", 238 type); 239 enable_intr(); 240 } 241 242 eva = 0; 243 if (frame.tf_trapno == T_PAGEFLT) { 244 /* 245 * For some Cyrix CPUs, %cr2 is clobbered by interrupts. 246 * This problem is worked around by using an interrupt 247 * gate for the pagefault handler. We are finally ready 248 * to read %cr2 and then must reenable interrupts. 249 * 250 * XXX this should be in the switch statement, but the 251 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the 252 * flow of control too much for this to be obviously 253 * correct. 254 */ 255 eva = rcr2(); 256 enable_intr(); 257 } 258 259 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 260 restart: 261 #endif 262 type = frame.tf_trapno; 263 code = frame.tf_err; 264 265 if (in_vm86call) { 266 if (frame.tf_eflags & PSL_VM && 267 (type == T_PROTFLT || type == T_STKFLT)) { 268 i = vm86_emulate((struct vm86frame *)&frame); 269 if (i != 0) 270 /* 271 * returns to original process 272 */ 273 vm86_trap((struct vm86frame *)&frame); 274 return; 275 } 276 switch (type) { 277 /* 278 * these traps want either a process context, or 279 * assume a normal userspace trap. 280 */ 281 case T_PROTFLT: 282 case T_SEGNPFLT: 283 trap_fatal(&frame, eva); 284 return; 285 case T_TRCTRAP: 286 type = T_BPTFLT; /* kernel breakpoint */ 287 /* FALL THROUGH */ 288 } 289 goto kernel_trap; /* normal kernel trap handling */ 290 } 291 292 if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { 293 /* user trap */ 294 295 sticks = p->p_sticks; 296 p->p_md.md_regs = &frame; 297 298 switch (type) { 299 case T_PRIVINFLT: /* privileged instruction fault */ 300 ucode = type; 301 i = SIGILL; 302 break; 303 304 case T_BPTFLT: /* bpt instruction fault */ 305 case T_TRCTRAP: /* trace trap */ 306 frame.tf_eflags &= ~PSL_T; 307 i = SIGTRAP; 308 break; 309 310 case T_ARITHTRAP: /* arithmetic trap */ 311 ucode = code; 312 i = SIGFPE; 313 break; 314 315 case T_ASTFLT: /* Allow process switch */ 316 astoff(); 317 cnt.v_soft++; 318 if (p->p_flag & P_OWEUPC) { 319 p->p_flag &= ~P_OWEUPC; 320 addupc_task(p, p->p_stats->p_prof.pr_addr, 321 p->p_stats->p_prof.pr_ticks); 322 } 323 goto out; 324 325 /* 326 * The following two traps can happen in 327 * vm86 mode, and, if so, we want to handle 328 * them specially. 329 */ 330 case T_PROTFLT: /* general protection fault */ 331 case T_STKFLT: /* stack fault */ 332 if (frame.tf_eflags & PSL_VM) { 333 i = vm86_emulate((struct vm86frame *)&frame); 334 if (i == 0) 335 goto out; 336 break; 337 } 338 /* FALL THROUGH */ 339 340 case T_SEGNPFLT: /* segment not present fault */ 341 case T_TSSFLT: /* invalid TSS fault */ 342 case T_DOUBLEFLT: /* double fault */ 343 default: 344 ucode = code + BUS_SEGM_FAULT ; 345 i = SIGBUS; 346 break; 347 348 case T_PAGEFLT: /* page fault */ 349 i = trap_pfault(&frame, TRUE, eva); 350 if (i == -1) 351 return; 352 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 353 if (i == -2) 354 goto restart; 355 #endif 356 if (i == 0) 357 goto out; 358 359 ucode = T_PAGEFLT; 360 break; 361 362 case T_DIVIDE: /* integer divide fault */ 363 ucode = FPE_INTDIV; 364 i = SIGFPE; 365 break; 366 367 #if NISA > 0 368 case T_NMI: 369 #ifdef POWERFAIL_NMI 370 goto handle_powerfail; 371 #else /* !POWERFAIL_NMI */ 372 /* machine/parity/power fail/"kitchen sink" faults */ 373 if (isa_nmi(code) == 0) { 374 #ifdef DDB 375 /* NMI can be hooked up to a pushbutton for debugging */ 376 printf ("NMI ... going to debugger\n"); 377 kdb_trap (type, 0, &frame); 378 #endif /* DDB */ 379 return; 380 } 381 panic("NMI indicates hardware failure"); 382 #endif /* POWERFAIL_NMI */ 383 #endif /* NISA > 0 */ 384 385 case T_OFLOW: /* integer overflow fault */ 386 ucode = FPE_INTOVF; 387 i = SIGFPE; 388 break; 389 390 case T_BOUND: /* bounds check fault */ 391 ucode = FPE_FLTSUB; 392 i = SIGFPE; 393 break; 394 395 case T_DNA: 396 #if NNPX > 0 397 /* if a transparent fault (due to context switch "late") */ 398 if (npxdna()) 399 return; 400 #endif 401 if (!pmath_emulate) { 402 i = SIGFPE; 403 ucode = FPE_FPU_NP_TRAP; 404 break; 405 } 406 i = (*pmath_emulate)(&frame); 407 if (i == 0) { 408 if (!(frame.tf_eflags & PSL_T)) 409 return; 410 frame.tf_eflags &= ~PSL_T; 411 i = SIGTRAP; 412 } 413 /* else ucode = emulator_only_knows() XXX */ 414 break; 415 416 case T_FPOPFLT: /* FPU operand fetch fault */ 417 ucode = T_FPOPFLT; 418 i = SIGILL; 419 break; 420 } 421 } else { 422 kernel_trap: 423 /* kernel trap */ 424 425 switch (type) { 426 case T_PAGEFLT: /* page fault */ 427 (void) trap_pfault(&frame, FALSE, eva); 428 return; 429 430 case T_DNA: 431 #if NNPX > 0 432 /* 433 * The kernel is apparently using npx for copying. 434 * XXX this should be fatal unless the kernel has 435 * registered such use. 436 */ 437 if (npxdna()) 438 return; 439 #endif 440 break; 441 442 case T_PROTFLT: /* general protection fault */ 443 case T_SEGNPFLT: /* segment not present fault */ 444 /* 445 * Invalid segment selectors and out of bounds 446 * %eip's and %esp's can be set up in user mode. 447 * This causes a fault in kernel mode when the 448 * kernel tries to return to user mode. We want 449 * to get this fault so that we can fix the 450 * problem here and not have to check all the 451 * selectors and pointers when the user changes 452 * them. 453 */ 454 #define MAYBE_DORETI_FAULT(where, whereto) \ 455 do { \ 456 if (frame.tf_eip == (int)where) { \ 457 frame.tf_eip = (int)whereto; \ 458 return; \ 459 } \ 460 } while (0) 461 462 if (intr_nesting_level == 0) { 463 /* 464 * Invalid %fs's and %gs's can be created using 465 * procfs or PT_SETREGS or by invalidating the 466 * underlying LDT entry. This causes a fault 467 * in kernel mode when the kernel attempts to 468 * switch contexts. Lose the bad context 469 * (XXX) so that we can continue, and generate 470 * a signal. 471 */ 472 if (frame.tf_eip == (int)cpu_switch_load_gs) { 473 curpcb->pcb_gs = 0; 474 psignal(p, SIGBUS); 475 return; 476 } 477 MAYBE_DORETI_FAULT(doreti_iret, 478 doreti_iret_fault); 479 MAYBE_DORETI_FAULT(doreti_popl_ds, 480 doreti_popl_ds_fault); 481 MAYBE_DORETI_FAULT(doreti_popl_es, 482 doreti_popl_es_fault); 483 MAYBE_DORETI_FAULT(doreti_popl_fs, 484 doreti_popl_fs_fault); 485 if (curpcb && curpcb->pcb_onfault) { 486 frame.tf_eip = (int)curpcb->pcb_onfault; 487 return; 488 } 489 } 490 break; 491 492 case T_TSSFLT: 493 /* 494 * PSL_NT can be set in user mode and isn't cleared 495 * automatically when the kernel is entered. This 496 * causes a TSS fault when the kernel attempts to 497 * `iret' because the TSS link is uninitialized. We 498 * want to get this fault so that we can fix the 499 * problem here and not every time the kernel is 500 * entered. 501 */ 502 if (frame.tf_eflags & PSL_NT) { 503 frame.tf_eflags &= ~PSL_NT; 504 return; 505 } 506 break; 507 508 case T_TRCTRAP: /* trace trap */ 509 if (frame.tf_eip == (int)IDTVEC(syscall)) { 510 /* 511 * We've just entered system mode via the 512 * syscall lcall. Continue single stepping 513 * silently until the syscall handler has 514 * saved the flags. 515 */ 516 return; 517 } 518 if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { 519 /* 520 * The syscall handler has now saved the 521 * flags. Stop single stepping it. 522 */ 523 frame.tf_eflags &= ~PSL_T; 524 return; 525 } 526 /* 527 * Ignore debug register trace traps due to 528 * accesses in the user's address space, which 529 * can happen under several conditions such as 530 * if a user sets a watchpoint on a buffer and 531 * then passes that buffer to a system call. 532 * We still want to get TRCTRAPS for addresses 533 * in kernel space because that is useful when 534 * debugging the kernel. 535 */ 536 if (user_dbreg_trap()) { 537 /* 538 * Reset breakpoint bits because the 539 * processor doesn't 540 */ 541 load_dr6(rdr6() & 0xfffffff0); 542 return; 543 } 544 /* 545 * Fall through (TRCTRAP kernel mode, kernel address) 546 */ 547 case T_BPTFLT: 548 /* 549 * If DDB is enabled, let it handle the debugger trap. 550 * Otherwise, debugger traps "can't happen". 551 */ 552 #ifdef DDB 553 if (kdb_trap (type, 0, &frame)) 554 return; 555 #endif 556 break; 557 558 #if NISA > 0 559 case T_NMI: 560 #ifdef POWERFAIL_NMI 561 #ifndef TIMER_FREQ 562 # define TIMER_FREQ 1193182 563 #endif 564 handle_powerfail: 565 { 566 static unsigned lastalert = 0; 567 568 if(time_second - lastalert > 10) 569 { 570 log(LOG_WARNING, "NMI: power fail\n"); 571 sysbeep(TIMER_FREQ/880, hz); 572 lastalert = time_second; 573 } 574 return; 575 } 576 #else /* !POWERFAIL_NMI */ 577 /* machine/parity/power fail/"kitchen sink" faults */ 578 if (isa_nmi(code) == 0) { 579 #ifdef DDB 580 /* NMI can be hooked up to a pushbutton for debugging */ 581 printf ("NMI ... going to debugger\n"); 582 kdb_trap (type, 0, &frame); 583 #endif /* DDB */ 584 return; 585 } 586 /* FALL THROUGH */ 587 #endif /* POWERFAIL_NMI */ 588 #endif /* NISA > 0 */ 589 } 590 591 trap_fatal(&frame, eva); 592 return; 593 } 594 595 /* Translate fault for emulators (e.g. Linux) */ 596 if (*p->p_sysent->sv_transtrap) 597 i = (*p->p_sysent->sv_transtrap)(i, type); 598 599 trapsignal(p, i, ucode); 600 601 #ifdef DEBUG 602 if (type <= MAX_TRAP_MSG) { 603 uprintf("fatal process exception: %s", 604 trap_msg[type]); 605 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 606 uprintf(", fault VA = 0x%lx", (u_long)eva); 607 uprintf("\n"); 608 } 609 #endif 610 611 out: 612 userret(p, &frame, sticks, 1); 613 } 614 615 #ifdef notyet 616 /* 617 * This version doesn't allow a page fault to user space while 618 * in the kernel. The rest of the kernel needs to be made "safe" 619 * before this can be used. I think the only things remaining 620 * to be made safe are the iBCS2 code and the process tracing/ 621 * debugging code. 622 */ 623 static int 624 trap_pfault(frame, usermode, eva) 625 struct trapframe *frame; 626 int usermode; 627 vm_offset_t eva; 628 { 629 vm_offset_t va; 630 struct vmspace *vm = NULL; 631 vm_map_t map = 0; 632 int rv = 0; 633 vm_prot_t ftype; 634 struct proc *p = curproc; 635 636 if (frame->tf_err & PGEX_W) 637 ftype = VM_PROT_READ | VM_PROT_WRITE; 638 else 639 ftype = VM_PROT_READ; 640 641 va = trunc_page(eva); 642 if (va < VM_MIN_KERNEL_ADDRESS) { 643 vm_offset_t v; 644 vm_page_t mpte; 645 646 if (p == NULL || 647 (!usermode && va < VM_MAXUSER_ADDRESS && 648 (intr_nesting_level != 0 || curpcb == NULL || 649 curpcb->pcb_onfault == NULL))) { 650 trap_fatal(frame, eva); 651 return (-1); 652 } 653 654 /* 655 * This is a fault on non-kernel virtual memory. 656 * vm is initialized above to NULL. If curproc is NULL 657 * or curproc->p_vmspace is NULL the fault is fatal. 658 */ 659 vm = p->p_vmspace; 660 if (vm == NULL) 661 goto nogo; 662 663 map = &vm->vm_map; 664 665 /* 666 * Keep swapout from messing with us during this 667 * critical time. 668 */ 669 ++p->p_lock; 670 671 /* 672 * Grow the stack if necessary 673 */ 674 /* grow_stack returns false only if va falls into 675 * a growable stack region and the stack growth 676 * fails. It returns true if va was not within 677 * a growable stack region, or if the stack 678 * growth succeeded. 679 */ 680 if (!grow_stack (p, va)) { 681 rv = KERN_FAILURE; 682 --p->p_lock; 683 goto nogo; 684 } 685 686 /* Fault in the user page: */ 687 rv = vm_fault(map, va, ftype, 688 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY 689 : VM_FAULT_NORMAL); 690 691 --p->p_lock; 692 } else { 693 /* 694 * Don't allow user-mode faults in kernel address space. 695 */ 696 if (usermode) 697 goto nogo; 698 699 /* 700 * Since we know that kernel virtual address addresses 701 * always have pte pages mapped, we just have to fault 702 * the page. 703 */ 704 rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); 705 } 706 707 if (rv == KERN_SUCCESS) 708 return (0); 709 nogo: 710 if (!usermode) { 711 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 712 frame->tf_eip = (int)curpcb->pcb_onfault; 713 return (0); 714 } 715 trap_fatal(frame, eva); 716 return (-1); 717 } 718 719 /* kludge to pass faulting virtual address to sendsig */ 720 frame->tf_err = eva; 721 722 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 723 } 724 #endif 725 726 int 727 trap_pfault(frame, usermode, eva) 728 struct trapframe *frame; 729 int usermode; 730 vm_offset_t eva; 731 { 732 vm_offset_t va; 733 struct vmspace *vm = NULL; 734 vm_map_t map = 0; 735 int rv = 0; 736 vm_prot_t ftype; 737 struct proc *p = curproc; 738 739 va = trunc_page(eva); 740 if (va >= KERNBASE) { 741 /* 742 * Don't allow user-mode faults in kernel address space. 743 * An exception: if the faulting address is the invalid 744 * instruction entry in the IDT, then the Intel Pentium 745 * F00F bug workaround was triggered, and we need to 746 * treat it is as an illegal instruction, and not a page 747 * fault. 748 */ 749 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 750 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { 751 frame->tf_trapno = T_PRIVINFLT; 752 return -2; 753 } 754 #endif 755 if (usermode) 756 goto nogo; 757 758 map = kernel_map; 759 } else { 760 /* 761 * This is a fault on non-kernel virtual memory. 762 * vm is initialized above to NULL. If curproc is NULL 763 * or curproc->p_vmspace is NULL the fault is fatal. 764 */ 765 if (p != NULL) 766 vm = p->p_vmspace; 767 768 if (vm == NULL) 769 goto nogo; 770 771 map = &vm->vm_map; 772 } 773 774 if (frame->tf_err & PGEX_W) 775 ftype = VM_PROT_READ | VM_PROT_WRITE; 776 else 777 ftype = VM_PROT_READ; 778 779 if (map != kernel_map) { 780 /* 781 * Keep swapout from messing with us during this 782 * critical time. 783 */ 784 ++p->p_lock; 785 786 /* 787 * Grow the stack if necessary 788 */ 789 /* grow_stack returns false only if va falls into 790 * a growable stack region and the stack growth 791 * fails. It returns true if va was not within 792 * a growable stack region, or if the stack 793 * growth succeeded. 794 */ 795 if (!grow_stack (p, va)) { 796 rv = KERN_FAILURE; 797 --p->p_lock; 798 goto nogo; 799 } 800 801 /* Fault in the user page: */ 802 rv = vm_fault(map, va, ftype, 803 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY 804 : VM_FAULT_NORMAL); 805 806 --p->p_lock; 807 } else { 808 /* 809 * Don't have to worry about process locking or stacks in the kernel. 810 */ 811 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 812 } 813 814 if (rv == KERN_SUCCESS) 815 return (0); 816 nogo: 817 if (!usermode) { 818 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 819 frame->tf_eip = (int)curpcb->pcb_onfault; 820 return (0); 821 } 822 trap_fatal(frame, eva); 823 return (-1); 824 } 825 826 /* kludge to pass faulting virtual address to sendsig */ 827 frame->tf_err = eva; 828 829 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 830 } 831 832 static void 833 trap_fatal(frame, eva) 834 struct trapframe *frame; 835 vm_offset_t eva; 836 { 837 int code, type, ss, esp; 838 struct soft_segment_descriptor softseg; 839 840 code = frame->tf_err; 841 type = frame->tf_trapno; 842 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 843 844 if (type <= MAX_TRAP_MSG) 845 printf("\n\nFatal trap %d: %s while in %s mode\n", 846 type, trap_msg[type], 847 frame->tf_eflags & PSL_VM ? "vm86" : 848 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 849 #ifdef SMP 850 /* three seperate prints in case of a trap on an unmapped page */ 851 printf("mp_lock = %08x; ", mp_lock); 852 printf("cpuid = %d; ", cpuid); 853 printf("lapic.id = %08x\n", lapic.id); 854 #endif 855 if (type == T_PAGEFLT) { 856 printf("fault virtual address = 0x%x\n", eva); 857 printf("fault code = %s %s, %s\n", 858 code & PGEX_U ? "user" : "supervisor", 859 code & PGEX_W ? "write" : "read", 860 code & PGEX_P ? "protection violation" : "page not present"); 861 } 862 printf("instruction pointer = 0x%x:0x%x\n", 863 frame->tf_cs & 0xffff, frame->tf_eip); 864 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { 865 ss = frame->tf_ss & 0xffff; 866 esp = frame->tf_esp; 867 } else { 868 ss = GSEL(GDATA_SEL, SEL_KPL); 869 esp = (int)&frame->tf_esp; 870 } 871 printf("stack pointer = 0x%x:0x%x\n", ss, esp); 872 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); 873 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 874 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 875 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 876 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, 877 softseg.ssd_gran); 878 printf("processor eflags = "); 879 if (frame->tf_eflags & PSL_T) 880 printf("trace trap, "); 881 if (frame->tf_eflags & PSL_I) 882 printf("interrupt enabled, "); 883 if (frame->tf_eflags & PSL_NT) 884 printf("nested task, "); 885 if (frame->tf_eflags & PSL_RF) 886 printf("resume, "); 887 if (frame->tf_eflags & PSL_VM) 888 printf("vm86, "); 889 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 890 printf("current process = "); 891 if (curproc) { 892 printf("%lu (%s)\n", 893 (u_long)curproc->p_pid, curproc->p_comm ? 894 curproc->p_comm : ""); 895 } else { 896 printf("Idle\n"); 897 } 898 printf("interrupt mask = "); 899 if ((cpl & net_imask) == net_imask) 900 printf("net "); 901 if ((cpl & tty_imask) == tty_imask) 902 printf("tty "); 903 if ((cpl & bio_imask) == bio_imask) 904 printf("bio "); 905 if ((cpl & cam_imask) == cam_imask) 906 printf("cam "); 907 if (cpl == 0) 908 printf("none"); 909 #ifdef SMP 910 /** 911 * XXX FIXME: 912 * we probably SHOULD have stopped the other CPUs before now! 913 * another CPU COULD have been touching cpl at this moment... 914 */ 915 printf(" <- SMP: XXX"); 916 #endif 917 printf("\n"); 918 919 #ifdef KDB 920 if (kdb_trap(&psl)) 921 return; 922 #endif 923 #ifdef DDB 924 if ((debugger_on_panic || db_active) && kdb_trap(type, 0, frame)) 925 return; 926 #endif 927 printf("trap number = %d\n", type); 928 if (type <= MAX_TRAP_MSG) 929 panic(trap_msg[type]); 930 else 931 panic("unknown/reserved trap"); 932 } 933 934 /* 935 * Double fault handler. Called when a fault occurs while writing 936 * a frame for a trap/exception onto the stack. This usually occurs 937 * when the stack overflows (such is the case with infinite recursion, 938 * for example). 939 * 940 * XXX Note that the current PTD gets replaced by IdlePTD when the 941 * task switch occurs. This means that the stack that was active at 942 * the time of the double fault is not available at <kstack> unless 943 * the machine was idle when the double fault occurred. The downside 944 * of this is that "trace <ebp>" in ddb won't work. 945 */ 946 void 947 dblfault_handler() 948 { 949 printf("\nFatal double fault:\n"); 950 printf("eip = 0x%x\n", common_tss.tss_eip); 951 printf("esp = 0x%x\n", common_tss.tss_esp); 952 printf("ebp = 0x%x\n", common_tss.tss_ebp); 953 #ifdef SMP 954 /* three seperate prints in case of a trap on an unmapped page */ 955 printf("mp_lock = %08x; ", mp_lock); 956 printf("cpuid = %d; ", cpuid); 957 printf("lapic.id = %08x\n", lapic.id); 958 #endif 959 panic("double fault"); 960 } 961 962 /* 963 * Compensate for 386 brain damage (missing URKR). 964 * This is a little simpler than the pagefault handler in trap() because 965 * it the page tables have already been faulted in and high addresses 966 * are thrown out early for other reasons. 967 */ 968 int trapwrite(addr) 969 unsigned addr; 970 { 971 struct proc *p; 972 vm_offset_t va; 973 struct vmspace *vm; 974 int rv; 975 976 va = trunc_page((vm_offset_t)addr); 977 /* 978 * XXX - MAX is END. Changed > to >= for temp. fix. 979 */ 980 if (va >= VM_MAXUSER_ADDRESS) 981 return (1); 982 983 p = curproc; 984 vm = p->p_vmspace; 985 986 ++p->p_lock; 987 988 if (!grow_stack (p, va)) { 989 --p->p_lock; 990 return (1); 991 } 992 993 /* 994 * fault the data page 995 */ 996 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY); 997 998 --p->p_lock; 999 1000 if (rv != KERN_SUCCESS) 1001 return 1; 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * syscall2 - MP aware system call request C handler 1008 * 1009 * A system call is essentially treated as a trap except that the 1010 * MP lock is not held on entry or return. We are responsible for 1011 * obtaining the MP lock if necessary and for handling ASTs 1012 * (e.g. a task switch) prior to return. 1013 * 1014 * In general, only simple access and manipulation of curproc and 1015 * the current stack is allowed without having to hold MP lock. 1016 */ 1017 void 1018 syscall2(frame) 1019 struct trapframe frame; 1020 { 1021 caddr_t params; 1022 int i; 1023 struct sysent *callp; 1024 struct proc *p = curproc; 1025 u_quad_t sticks; 1026 int error; 1027 int narg; 1028 int args[8]; 1029 int have_mplock = 0; 1030 u_int code; 1031 1032 #ifdef DIAGNOSTIC 1033 if (ISPL(frame.tf_cs) != SEL_UPL) { 1034 get_mplock(); 1035 panic("syscall"); 1036 /* NOT REACHED */ 1037 } 1038 #endif 1039 1040 /* 1041 * handle atomicy by looping since interrupts are enabled and the 1042 * MP lock is not held. 1043 */ 1044 sticks = ((volatile struct proc *)p)->p_sticks; 1045 while (sticks != ((volatile struct proc *)p)->p_sticks) 1046 sticks = ((volatile struct proc *)p)->p_sticks; 1047 1048 p->p_md.md_regs = &frame; 1049 params = (caddr_t)frame.tf_esp + sizeof(int); 1050 code = frame.tf_eax; 1051 1052 if (p->p_sysent->sv_prepsyscall) { 1053 /* 1054 * The prep code is not MP aware. 1055 */ 1056 get_mplock(); 1057 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); 1058 rel_mplock(); 1059 } else { 1060 /* 1061 * Need to check if this is a 32 bit or 64 bit syscall. 1062 * fuword is MP aware. 1063 */ 1064 if (code == SYS_syscall) { 1065 /* 1066 * Code is first argument, followed by actual args. 1067 */ 1068 code = fuword(params); 1069 params += sizeof(int); 1070 } else if (code == SYS___syscall) { 1071 /* 1072 * Like syscall, but code is a quad, so as to maintain 1073 * quad alignment for the rest of the arguments. 1074 */ 1075 code = fuword(params); 1076 params += sizeof(quad_t); 1077 } 1078 } 1079 1080 if (p->p_sysent->sv_mask) 1081 code &= p->p_sysent->sv_mask; 1082 1083 if (code >= p->p_sysent->sv_size) 1084 callp = &p->p_sysent->sv_table[0]; 1085 else 1086 callp = &p->p_sysent->sv_table[code]; 1087 1088 narg = callp->sy_narg & SYF_ARGMASK; 1089 1090 /* 1091 * copyin is MP aware, but the tracing code is not 1092 */ 1093 if (params && (i = narg * sizeof(int)) && 1094 (error = copyin(params, (caddr_t)args, (u_int)i))) { 1095 get_mplock(); 1096 have_mplock = 1; 1097 #ifdef KTRACE 1098 if (KTRPOINT(p, KTR_SYSCALL)) 1099 ktrsyscall(p->p_tracep, code, narg, args); 1100 #endif 1101 goto bad; 1102 } 1103 1104 /* 1105 * Try to run the syscall without the MP lock if the syscall 1106 * is MP safe. We have to obtain the MP lock no matter what if 1107 * we are ktracing 1108 */ 1109 if ((callp->sy_narg & SYF_MPSAFE) == 0) { 1110 get_mplock(); 1111 have_mplock = 1; 1112 } 1113 1114 #ifdef KTRACE 1115 if (KTRPOINT(p, KTR_SYSCALL)) { 1116 if (have_mplock == 0) { 1117 get_mplock(); 1118 have_mplock = 1; 1119 } 1120 ktrsyscall(p->p_tracep, code, narg, args); 1121 } 1122 #endif 1123 p->p_retval[0] = 0; 1124 p->p_retval[1] = frame.tf_edx; 1125 1126 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1127 1128 error = (*callp->sy_call)(p, args); 1129 1130 /* 1131 * MP SAFE (we may or may not have the MP lock at this point) 1132 */ 1133 switch (error) { 1134 case 0: 1135 /* 1136 * Reinitialize proc pointer `p' as it may be different 1137 * if this is a child returning from fork syscall. 1138 */ 1139 p = curproc; 1140 frame.tf_eax = p->p_retval[0]; 1141 frame.tf_edx = p->p_retval[1]; 1142 frame.tf_eflags &= ~PSL_C; 1143 break; 1144 1145 case ERESTART: 1146 /* 1147 * Reconstruct pc, assuming lcall $X,y is 7 bytes, 1148 * int 0x80 is 2 bytes. We saved this in tf_err. 1149 */ 1150 frame.tf_eip -= frame.tf_err; 1151 break; 1152 1153 case EJUSTRETURN: 1154 break; 1155 1156 default: 1157 bad: 1158 if (p->p_sysent->sv_errsize) { 1159 if (error >= p->p_sysent->sv_errsize) 1160 error = -1; /* XXX */ 1161 else 1162 error = p->p_sysent->sv_errtbl[error]; 1163 } 1164 frame.tf_eax = error; 1165 frame.tf_eflags |= PSL_C; 1166 break; 1167 } 1168 1169 /* 1170 * Traced syscall. trapsignal() is not MP aware. 1171 */ 1172 if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { 1173 if (have_mplock == 0) { 1174 get_mplock(); 1175 have_mplock = 1; 1176 } 1177 frame.tf_eflags &= ~PSL_T; 1178 trapsignal(p, SIGTRAP, 0); 1179 } 1180 1181 /* 1182 * Handle reschedule and other end-of-syscall issues 1183 */ 1184 have_mplock = userret(p, &frame, sticks, have_mplock); 1185 1186 #ifdef KTRACE 1187 if (KTRPOINT(p, KTR_SYSRET)) { 1188 if (have_mplock == 0) { 1189 get_mplock(); 1190 have_mplock = 1; 1191 } 1192 ktrsysret(p->p_tracep, code, error, p->p_retval[0]); 1193 } 1194 #endif 1195 1196 /* 1197 * This works because errno is findable through the 1198 * register set. If we ever support an emulation where this 1199 * is not the case, this code will need to be revisited. 1200 */ 1201 STOPEVENT(p, S_SCX, code); 1202 1203 /* 1204 * Release the MP lock if we had to get it 1205 */ 1206 if (have_mplock) 1207 rel_mplock(); 1208 } 1209 1210 /* 1211 * Simplified back end of syscall(), used when returning from fork() 1212 * directly into user mode. MP lock is held on entry and should be 1213 * held on return. 1214 */ 1215 void 1216 fork_return(p, frame) 1217 struct proc *p; 1218 struct trapframe frame; 1219 { 1220 frame.tf_eax = 0; /* Child returns zero */ 1221 frame.tf_eflags &= ~PSL_C; /* success */ 1222 frame.tf_edx = 1; 1223 1224 userret(p, &frame, 0, 1); 1225 #ifdef KTRACE 1226 if (KTRPOINT(p, KTR_SYSRET)) 1227 ktrsysret(p->p_tracep, SYS_fork, 0, 0); 1228 #endif 1229 } 1230