1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $Id: trap.c,v 1.50 1995/03/21 07:02:51 davidg Exp $ 39 */ 40 41 /* 42 * 386 Trap and System call handling 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/user.h> 49 #include <sys/acct.h> 50 #include <sys/kernel.h> 51 #include <sys/syscall.h> 52 #include <sys/sysent.h> 53 #ifdef KTRACE 54 #include <sys/ktrace.h> 55 #endif 56 57 #include <vm/vm_param.h> 58 #include <vm/pmap.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_page.h> 62 63 #include <machine/cpu.h> 64 #include <machine/md_var.h> 65 #include <machine/psl.h> 66 #include <machine/reg.h> 67 #include <machine/trap.h> 68 #include <machine/../isa/isa_device.h> 69 70 #include "isa.h" 71 #include "npx.h" 72 73 int trap_pfault __P((struct trapframe *, int)); 74 void trap_fatal __P((struct trapframe *)); 75 76 #define MAX_TRAP_MSG 27 77 char *trap_msg[] = { 78 "", /* 0 unused */ 79 "privileged instruction fault", /* 1 T_PRIVINFLT */ 80 "", /* 2 unused */ 81 "breakpoint instruction fault", /* 3 T_BPTFLT */ 82 "", /* 4 unused */ 83 "", /* 5 unused */ 84 "arithmetic trap", /* 6 T_ARITHTRAP */ 85 "system forced exception", /* 7 T_ASTFLT */ 86 "", /* 8 unused */ 87 "general protection fault", /* 9 T_PROTFLT */ 88 "trace trap", /* 10 T_TRCTRAP */ 89 "", /* 11 unused */ 90 "page fault", /* 12 T_PAGEFLT */ 91 "", /* 13 unused */ 92 "alignment fault", /* 14 T_ALIGNFLT */ 93 "", /* 15 unused */ 94 "", /* 16 unused */ 95 "", /* 17 unused */ 96 "integer divide fault", /* 18 T_DIVIDE */ 97 "non-maskable interrupt trap", /* 19 T_NMI */ 98 "overflow trap", /* 20 T_OFLOW */ 99 "FPU bounds check fault", /* 21 T_BOUND */ 100 "FPU device not available", /* 22 T_DNA */ 101 "double fault", /* 23 T_DOUBLEFLT */ 102 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 103 "invalid TSS fault", /* 25 T_TSSFLT */ 104 "segment not present fault", /* 26 T_SEGNPFLT */ 105 "stack fault", /* 27 T_STKFLT */ 106 }; 107 108 static inline void 109 userret(p, frame, oticks) 110 struct proc *p; 111 struct trapframe *frame; 112 u_quad_t oticks; 113 { 114 int sig, s; 115 116 while ((sig = CURSIG(p)) != 0) 117 postsig(sig); 118 p->p_priority = p->p_usrpri; 119 if (want_resched) { 120 /* 121 * Since we are curproc, clock will normally just change 122 * our priority without moving us from one queue to another 123 * (since the running process is not on a queue.) 124 * If that happened after we setrunqueue ourselves but before we 125 * mi_switch()'ed, we might not be on the queue indicated by 126 * our priority. 127 */ 128 s = splclock(); 129 setrunqueue(p); 130 p->p_stats->p_ru.ru_nivcsw++; 131 mi_switch(); 132 splx(s); 133 while ((sig = CURSIG(p)) != 0) 134 postsig(sig); 135 } 136 /* 137 * Charge system time if profiling. 138 */ 139 if (p->p_flag & P_PROFIL) { 140 u_quad_t ticks = p->p_sticks - oticks; 141 142 if (ticks) { 143 #ifdef PROFTIMER 144 extern int profscale; 145 addupc(frame->tf_eip, &p->p_stats->p_prof, 146 ticks * profscale); 147 #else 148 addupc(frame->tf_eip, &p->p_stats->p_prof, ticks); 149 #endif 150 } 151 } 152 curpriority = p->p_priority; 153 } 154 155 /* 156 * trap(frame): 157 * Exception, fault, and trap interface to the FreeBSD kernel. 158 * This common code is called from assembly language IDT gate entry 159 * routines that prepare a suitable stack frame, and restore this 160 * frame after the exception has been processed. 161 */ 162 163 /*ARGSUSED*/ 164 void 165 trap(frame) 166 struct trapframe frame; 167 { 168 struct proc *p = curproc; 169 u_quad_t sticks = 0; 170 int i = 0, ucode = 0, type, code; 171 #ifdef DIAGNOSTIC 172 u_long eva; 173 #endif 174 175 type = frame.tf_trapno; 176 code = frame.tf_err; 177 178 if (ISPL(frame.tf_cs) == SEL_UPL) { 179 /* user trap */ 180 181 sticks = p->p_sticks; 182 p->p_md.md_regs = (int *)&frame; 183 184 switch (type) { 185 case T_PRIVINFLT: /* privileged instruction fault */ 186 ucode = type; 187 i = SIGILL; 188 break; 189 190 case T_BPTFLT: /* bpt instruction fault */ 191 case T_TRCTRAP: /* trace trap */ 192 frame.tf_eflags &= ~PSL_T; 193 i = SIGTRAP; 194 break; 195 196 case T_ARITHTRAP: /* arithmetic trap */ 197 ucode = code; 198 i = SIGFPE; 199 break; 200 201 case T_ASTFLT: /* Allow process switch */ 202 astoff(); 203 cnt.v_soft++; 204 if (p->p_flag & P_OWEUPC) { 205 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 206 p->p_flag &= ~P_OWEUPC; 207 } 208 goto out; 209 210 case T_PROTFLT: /* general protection fault */ 211 case T_SEGNPFLT: /* segment not present fault */ 212 case T_STKFLT: /* stack fault */ 213 case T_TSSFLT: /* invalid TSS fault */ 214 case T_DOUBLEFLT: /* double fault */ 215 default: 216 ucode = code + BUS_SEGM_FAULT ; 217 i = SIGBUS; 218 break; 219 220 case T_PAGEFLT: /* page fault */ 221 i = trap_pfault(&frame, TRUE); 222 if (i == -1) 223 return; 224 if (i == 0) 225 goto out; 226 227 ucode = T_PAGEFLT; 228 break; 229 230 case T_DIVIDE: /* integer divide fault */ 231 ucode = FPE_INTDIV_TRAP; 232 i = SIGFPE; 233 break; 234 235 #if NISA > 0 236 case T_NMI: 237 #ifdef DDB 238 /* NMI can be hooked up to a pushbutton for debugging */ 239 printf ("NMI ... going to debugger\n"); 240 if (kdb_trap (type, 0, &frame)) 241 return; 242 #endif 243 /* machine/parity/power fail/"kitchen sink" faults */ 244 if (isa_nmi(code) == 0) return; 245 panic("NMI indicates hardware failure"); 246 #endif 247 248 case T_OFLOW: /* integer overflow fault */ 249 ucode = FPE_INTOVF_TRAP; 250 i = SIGFPE; 251 break; 252 253 case T_BOUND: /* bounds check fault */ 254 ucode = FPE_SUBRNG_TRAP; 255 i = SIGFPE; 256 break; 257 258 case T_DNA: 259 #if NNPX > 0 260 /* if a transparent fault (due to context switch "late") */ 261 if (npxdna()) 262 return; 263 #endif /* NNPX > 0 */ 264 265 #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) 266 i = math_emulate(&frame); 267 if (i == 0) { 268 if (!(frame.tf_eflags & PSL_T)) 269 return; 270 frame.tf_eflags &= ~PSL_T; 271 i = SIGTRAP; 272 } 273 /* else ucode = emulator_only_knows() XXX */ 274 #else /* MATH_EMULATE || GPL_MATH_EMULATE */ 275 i = SIGFPE; 276 ucode = FPE_FPU_NP_TRAP; 277 #endif /* MATH_EMULATE || GPL_MATH_EMULATE */ 278 break; 279 280 case T_FPOPFLT: /* FPU operand fetch fault */ 281 ucode = T_FPOPFLT; 282 i = SIGILL; 283 break; 284 } 285 } else { 286 /* kernel trap */ 287 288 switch (type) { 289 case T_PAGEFLT: /* page fault */ 290 (void) trap_pfault(&frame, FALSE); 291 return; 292 293 case T_PROTFLT: /* general protection fault */ 294 case T_SEGNPFLT: /* segment not present fault */ 295 /* 296 * Invalid segment selectors and out of bounds 297 * %eip's and %esp's can be set up in user mode. 298 * This causes a fault in kernel mode when the 299 * kernel tries to return to user mode. We want 300 * to get this fault so that we can fix the 301 * problem here and not have to check all the 302 * selectors and pointers when the user changes 303 * them. 304 */ 305 #define MAYBE_DORETI_FAULT(where, whereto) \ 306 do { \ 307 if (frame.tf_eip == (int)where) { \ 308 frame.tf_eip = (int)whereto; \ 309 return; \ 310 } \ 311 } while (0) 312 313 if (intr_nesting_level == 0) { 314 MAYBE_DORETI_FAULT(doreti_iret, 315 doreti_iret_fault); 316 MAYBE_DORETI_FAULT(doreti_popl_ds, 317 doreti_popl_ds_fault); 318 MAYBE_DORETI_FAULT(doreti_popl_es, 319 doreti_popl_es_fault); 320 } 321 if (curpcb && curpcb->pcb_onfault) { 322 frame.tf_eip = (int)curpcb->pcb_onfault; 323 return; 324 } 325 break; 326 327 case T_TSSFLT: 328 /* 329 * PSL_NT can be set in user mode and isn't cleared 330 * automatically when the kernel is entered. This 331 * causes a TSS fault when the kernel attempts to 332 * `iret' because the TSS link is uninitialized. We 333 * want to get this fault so that we can fix the 334 * problem here and not every time the kernel is 335 * entered. 336 */ 337 if (frame.tf_eflags & PSL_NT) { 338 frame.tf_eflags &= ~PSL_NT; 339 return; 340 } 341 break; 342 343 #ifdef DDB 344 case T_BPTFLT: 345 case T_TRCTRAP: 346 if (kdb_trap (type, 0, &frame)) 347 return; 348 break; 349 #else 350 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 351 /* Q: how do we turn it on again? */ 352 frame.tf_eflags &= ~PSL_T; 353 return; 354 #endif 355 356 #if NISA > 0 357 case T_NMI: 358 #ifdef DDB 359 /* NMI can be hooked up to a pushbutton for debugging */ 360 printf ("NMI ... going to debugger\n"); 361 if (kdb_trap (type, 0, &frame)) 362 return; 363 #endif 364 /* machine/parity/power fail/"kitchen sink" faults */ 365 if (isa_nmi(code) == 0) return; 366 /* FALL THROUGH */ 367 #endif 368 } 369 370 trap_fatal(&frame); 371 return; 372 } 373 374 trapsignal(p, i, ucode); 375 376 #ifdef DEBUG 377 eva = rcr2(); 378 if (type <= MAX_TRAP_MSG) { 379 uprintf("fatal process exception: %s", 380 trap_msg[type]); 381 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 382 uprintf(", fault VA = 0x%x", eva); 383 uprintf("\n"); 384 } 385 #endif 386 387 out: 388 userret(p, &frame, sticks); 389 } 390 391 #ifdef notyet 392 /* 393 * This version doesn't allow a page fault to user space while 394 * in the kernel. The rest of the kernel needs to be made "safe" 395 * before this can be used. I think the only things remaining 396 * to be made safe are the iBCS2 code and the process tracing/ 397 * debugging code. 398 */ 399 int 400 trap_pfault(frame, usermode) 401 struct trapframe *frame; 402 int usermode; 403 { 404 vm_offset_t va; 405 struct vmspace *vm = NULL; 406 vm_map_t map = 0; 407 int rv = 0; 408 vm_prot_t ftype; 409 int eva; 410 struct proc *p = curproc; 411 412 if (frame->tf_err & PGEX_W) 413 ftype = VM_PROT_READ | VM_PROT_WRITE; 414 else 415 ftype = VM_PROT_READ; 416 417 eva = rcr2(); 418 va = trunc_page((vm_offset_t)eva); 419 420 if (va < VM_MIN_KERNEL_ADDRESS) { 421 vm_offset_t v; 422 vm_page_t ptepg; 423 424 if ((p == NULL) || 425 (!usermode && va < VM_MAXUSER_ADDRESS && 426 curpcb->pcb_onfault == NULL)) { 427 trap_fatal(frame); 428 return (-1); 429 } 430 431 /* 432 * This is a fault on non-kernel virtual memory. 433 * vm is initialized above to NULL. If curproc is NULL 434 * or curproc->p_vmspace is NULL the fault is fatal. 435 */ 436 vm = p->p_vmspace; 437 if (vm == NULL) 438 goto nogo; 439 440 map = &vm->vm_map; 441 442 /* 443 * Keep swapout from messing with us during this 444 * critical time. 445 */ 446 ++p->p_lock; 447 448 /* 449 * Grow the stack if necessary 450 */ 451 if ((caddr_t)va > vm->vm_maxsaddr 452 && (caddr_t)va < (caddr_t)USRSTACK) { 453 if (!grow(p, va)) { 454 rv = KERN_FAILURE; 455 --p->p_lock; 456 goto nogo; 457 } 458 } 459 460 /* 461 * Check if page table is mapped, if not, 462 * fault it first 463 */ 464 v = (vm_offset_t) vtopte(va); 465 466 /* Fault the pte only if needed: */ 467 *(volatile char *)v += 0; 468 469 pmap_use_pt( vm_map_pmap(map), va); 470 471 /* Fault in the user page: */ 472 rv = vm_fault(map, va, ftype, FALSE); 473 474 pmap_unuse_pt( vm_map_pmap(map), va); 475 476 --p->p_lock; 477 } else { 478 /* 479 * Don't allow user-mode faults in kernel address space. 480 */ 481 if (usermode) 482 goto nogo; 483 484 /* 485 * Since we know that kernel virtual address addresses 486 * always have pte pages mapped, we just have to fault 487 * the page. 488 */ 489 rv = vm_fault(kernel_map, va, ftype, FALSE); 490 } 491 492 if (rv == KERN_SUCCESS) 493 return (0); 494 nogo: 495 if (!usermode) { 496 if (curpcb && curpcb->pcb_onfault) { 497 frame->tf_eip = (int)curpcb->pcb_onfault; 498 return (0); 499 } 500 trap_fatal(frame); 501 return (-1); 502 } 503 504 /* kludge to pass faulting virtual address to sendsig */ 505 frame->tf_err = eva; 506 507 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 508 } 509 #endif 510 511 int 512 trap_pfault(frame, usermode) 513 struct trapframe *frame; 514 int usermode; 515 { 516 vm_offset_t va; 517 struct vmspace *vm = NULL; 518 vm_map_t map = 0; 519 int rv = 0; 520 vm_prot_t ftype; 521 int eva; 522 struct proc *p = curproc; 523 524 eva = rcr2(); 525 va = trunc_page((vm_offset_t)eva); 526 527 if (va >= KERNBASE) { 528 /* 529 * Don't allow user-mode faults in kernel address space. 530 */ 531 if (usermode) 532 goto nogo; 533 534 map = kernel_map; 535 } else { 536 /* 537 * This is a fault on non-kernel virtual memory. 538 * vm is initialized above to NULL. If curproc is NULL 539 * or curproc->p_vmspace is NULL the fault is fatal. 540 */ 541 if (p != NULL) 542 vm = p->p_vmspace; 543 544 if (vm == NULL) 545 goto nogo; 546 547 map = &vm->vm_map; 548 } 549 550 if (frame->tf_err & PGEX_W) 551 ftype = VM_PROT_READ | VM_PROT_WRITE; 552 else 553 ftype = VM_PROT_READ; 554 555 if (map != kernel_map) { 556 vm_offset_t v = (vm_offset_t) vtopte(va); 557 vm_page_t ptepg; 558 559 /* 560 * Keep swapout from messing with us during this 561 * critical time. 562 */ 563 ++p->p_lock; 564 565 /* 566 * Grow the stack if necessary 567 */ 568 if ((caddr_t)va > vm->vm_maxsaddr 569 && (caddr_t)va < (caddr_t)USRSTACK) { 570 if (!grow(p, va)) { 571 rv = KERN_FAILURE; 572 --p->p_lock; 573 goto nogo; 574 } 575 } 576 577 /* 578 * Check if page table is mapped, if not, 579 * fault it first 580 */ 581 582 /* Fault the pte only if needed: */ 583 *(volatile char *)v += 0; 584 585 pmap_use_pt( vm_map_pmap(map), va); 586 587 /* Fault in the user page: */ 588 rv = vm_fault(map, va, ftype, FALSE); 589 590 pmap_unuse_pt( vm_map_pmap(map), va); 591 592 --p->p_lock; 593 } else { 594 /* 595 * Since we know that kernel virtual address addresses 596 * always have pte pages mapped, we just have to fault 597 * the page. 598 */ 599 rv = vm_fault(map, va, ftype, FALSE); 600 } 601 602 if (rv == KERN_SUCCESS) 603 return (0); 604 nogo: 605 if (!usermode) { 606 if (curpcb && curpcb->pcb_onfault) { 607 frame->tf_eip = (int)curpcb->pcb_onfault; 608 return (0); 609 } 610 trap_fatal(frame); 611 return (-1); 612 } 613 614 /* kludge to pass faulting virtual address to sendsig */ 615 frame->tf_err = eva; 616 617 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 618 } 619 620 void 621 trap_fatal(frame) 622 struct trapframe *frame; 623 { 624 int code, type, eva; 625 struct soft_segment_descriptor softseg; 626 627 code = frame->tf_err; 628 type = frame->tf_trapno; 629 eva = rcr2(); 630 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 631 632 if (type <= MAX_TRAP_MSG) 633 printf("\n\nFatal trap %d: %s while in %s mode\n", 634 type, trap_msg[type], 635 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 636 if (type == T_PAGEFLT) { 637 printf("fault virtual address = 0x%x\n", eva); 638 printf("fault code = %s %s, %s\n", 639 code & PGEX_U ? "user" : "supervisor", 640 code & PGEX_W ? "write" : "read", 641 code & PGEX_P ? "protection violation" : "page not present"); 642 } 643 printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); 644 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 645 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 646 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 647 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); 648 printf("processor eflags = "); 649 if (frame->tf_eflags & PSL_T) 650 printf("trace/trap, "); 651 if (frame->tf_eflags & PSL_I) 652 printf("interrupt enabled, "); 653 if (frame->tf_eflags & PSL_NT) 654 printf("nested task, "); 655 if (frame->tf_eflags & PSL_RF) 656 printf("resume, "); 657 if (frame->tf_eflags & PSL_VM) 658 printf("vm86, "); 659 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 660 printf("current process = "); 661 if (curproc) { 662 printf("%lu (%s)\n", 663 (u_long)curproc->p_pid, curproc->p_comm ? 664 curproc->p_comm : ""); 665 } else { 666 printf("Idle\n"); 667 } 668 printf("interrupt mask = "); 669 if ((cpl & net_imask) == net_imask) 670 printf("net "); 671 if ((cpl & tty_imask) == tty_imask) 672 printf("tty "); 673 if ((cpl & bio_imask) == bio_imask) 674 printf("bio "); 675 if (cpl == 0) 676 printf("none"); 677 printf("\n"); 678 679 #ifdef KDB 680 if (kdb_trap(&psl)) 681 return; 682 #endif 683 #ifdef DDB 684 if (kdb_trap (type, 0, frame)) 685 return; 686 #endif 687 if (type <= MAX_TRAP_MSG) 688 panic(trap_msg[type]); 689 else 690 panic("unknown/reserved trap"); 691 } 692 693 /* 694 * Compensate for 386 brain damage (missing URKR). 695 * This is a little simpler than the pagefault handler in trap() because 696 * it the page tables have already been faulted in and high addresses 697 * are thrown out early for other reasons. 698 */ 699 int trapwrite(addr) 700 unsigned addr; 701 { 702 struct proc *p; 703 vm_offset_t va, v; 704 struct vmspace *vm; 705 int rv; 706 707 va = trunc_page((vm_offset_t)addr); 708 /* 709 * XXX - MAX is END. Changed > to >= for temp. fix. 710 */ 711 if (va >= VM_MAXUSER_ADDRESS) 712 return (1); 713 714 p = curproc; 715 vm = p->p_vmspace; 716 717 ++p->p_lock; 718 719 if ((caddr_t)va >= vm->vm_maxsaddr 720 && (caddr_t)va < (caddr_t)USRSTACK) { 721 if (!grow(p, va)) { 722 --p->p_lock; 723 return (1); 724 } 725 } 726 727 v = trunc_page(vtopte(va)); 728 729 /* 730 * wire the pte page 731 */ 732 if (va < USRSTACK) { 733 vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); 734 } 735 736 /* 737 * fault the data page 738 */ 739 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); 740 741 /* 742 * unwire the pte page 743 */ 744 if (va < USRSTACK) { 745 vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); 746 } 747 748 --p->p_lock; 749 750 if (rv != KERN_SUCCESS) 751 return 1; 752 753 return (0); 754 } 755 756 /* 757 * syscall(frame): 758 * System call request from POSIX system call gate interface to kernel. 759 * Like trap(), argument is call by reference. 760 */ 761 /*ARGSUSED*/ 762 void 763 syscall(frame) 764 struct trapframe frame; 765 { 766 caddr_t params; 767 int i; 768 struct sysent *callp; 769 struct proc *p = curproc; 770 u_quad_t sticks; 771 int error, opc; 772 int args[8], rval[2]; 773 u_int code; 774 775 sticks = p->p_sticks; 776 if (ISPL(frame.tf_cs) != SEL_UPL) 777 panic("syscall"); 778 779 code = frame.tf_eax; 780 p->p_md.md_regs = (int *)&frame; 781 params = (caddr_t)frame.tf_esp + sizeof (int) ; 782 783 /* 784 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 785 */ 786 opc = frame.tf_eip - 7; 787 /* 788 * Need to check if this is a 32 bit or 64 bit syscall. 789 */ 790 if (code == SYS_syscall) { 791 /* 792 * Code is first argument, followed by actual args. 793 */ 794 code = fuword(params); 795 params += sizeof (int); 796 } else if (code == SYS___syscall) { 797 /* 798 * Like syscall, but code is a quad, so as to maintain 799 * quad alignment for the rest of the arguments. 800 */ 801 code = fuword(params + _QUAD_LOWWORD * sizeof(int)); 802 params += sizeof(quad_t); 803 } 804 805 if (p->p_sysent->sv_mask) 806 code = code & p->p_sysent->sv_mask; 807 808 if (code >= p->p_sysent->sv_size) 809 callp = &p->p_sysent->sv_table[0]; 810 else 811 callp = &p->p_sysent->sv_table[code]; 812 813 if ((i = callp->sy_narg * sizeof (int)) && 814 (error = copyin(params, (caddr_t)args, (u_int)i))) { 815 #ifdef KTRACE 816 if (KTRPOINT(p, KTR_SYSCALL)) 817 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 818 #endif 819 goto bad; 820 } 821 #ifdef KTRACE 822 if (KTRPOINT(p, KTR_SYSCALL)) 823 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 824 #endif 825 rval[0] = 0; 826 rval[1] = frame.tf_edx; 827 828 error = (*callp->sy_call)(p, args, rval); 829 830 switch (error) { 831 832 case 0: 833 /* 834 * Reinitialize proc pointer `p' as it may be different 835 * if this is a child returning from fork syscall. 836 */ 837 p = curproc; 838 frame.tf_eax = rval[0]; 839 frame.tf_edx = rval[1]; 840 frame.tf_eflags &= ~PSL_C; /* carry bit */ 841 break; 842 843 case ERESTART: 844 frame.tf_eip = opc; 845 break; 846 847 case EJUSTRETURN: 848 break; 849 850 default: 851 bad: 852 if (p->p_sysent->sv_errsize) 853 if (error >= p->p_sysent->sv_errsize) 854 error = -1; /* XXX */ 855 else 856 error = p->p_sysent->sv_errtbl[error]; 857 frame.tf_eax = error; 858 frame.tf_eflags |= PSL_C; /* carry bit */ 859 break; 860 } 861 862 userret(p, &frame, sticks); 863 864 #ifdef KTRACE 865 if (KTRPOINT(p, KTR_SYSRET)) 866 ktrsysret(p->p_tracep, code, error, rval[0]); 867 #endif 868 } 869 870 #ifdef COMPAT_LINUX 871 /* 872 * linux_syscall(frame): 873 */ 874 /*ARGSUSED*/ 875 void 876 linux_syscall(frame) 877 struct trapframe frame; 878 { 879 caddr_t params; 880 int i; 881 struct proc *p = curproc; 882 struct sysent *callp; 883 u_quad_t sticks; 884 int error, opc; 885 int rval[2]; 886 int code; 887 struct linux_syscall_args { 888 int ebx; 889 int ecx; 890 int edx; 891 int esi; 892 int edi; 893 int ebp; 894 int eax; 895 } args; 896 897 args.ebx = frame.tf_ebx; 898 args.ecx = frame.tf_ecx; 899 args.edx = frame.tf_edx; 900 args.esi = frame.tf_esi; 901 args.edi = frame.tf_edi; 902 args.ebp = frame.tf_ebp; 903 args.eax = frame.tf_eax; 904 905 sticks = p->p_sticks; 906 if (ISPL(frame.tf_cs) != SEL_UPL) 907 panic("linux syscall"); 908 909 code = frame.tf_eax; 910 p->p_md.md_regs = (int *)&frame; 911 params = (caddr_t)frame.tf_esp + sizeof (int) ; 912 913 /* 914 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 915 * THIS IS WRONG FOR LINUX XXX SOS 916 * SIZE OF INT 0x80 (2??) NEEDED HERE !!! 917 */ 918 opc = frame.tf_eip - 2; /* was 7 */ 919 if (code == 0) { 920 code = fuword(params); 921 params += sizeof (int); 922 } 923 if (p->p_sysent->sv_mask) 924 code = code & p->p_sysent->sv_mask; 925 926 if (code < 0 || code >= p->p_sysent->sv_size) 927 callp = &p->p_sysent->sv_table[0]; 928 else 929 callp = &p->p_sysent->sv_table[code]; 930 931 #ifdef KTRACE 932 if (KTRPOINT(p, KTR_SYSCALL)) 933 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 934 #endif 935 936 #ifdef KTRACE 937 if (KTRPOINT(p, KTR_SYSCALL)) 938 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 939 #endif 940 rval[0] = 0; 941 rval[1] = frame.tf_edx; 942 943 error = (*callp->sy_call)(p, &args, rval); 944 945 switch (error) { 946 947 case 0: 948 /* 949 * Reinitialize proc pointer `p' as it may be different 950 * if this is a child returning from fork syscall. 951 */ 952 p = curproc; 953 frame.tf_eax = rval[0]; 954 frame.tf_eflags &= ~PSL_C; /* carry bit */ 955 break; 956 957 case ERESTART: 958 frame.tf_eip = opc; 959 break; 960 961 case EJUSTRETURN: 962 break; 963 964 default: 965 bad: 966 if (p->p_sysent->sv_errsize) 967 if (error >= p->p_sysent->sv_errsize) 968 error = -1; /* XXX */ 969 else 970 error = p->p_sysent->sv_errtbl[error]; 971 frame.tf_eax = -error; 972 frame.tf_eflags |= PSL_C; /* carry bit */ 973 break; 974 } 975 976 userret(p, &frame, sticks); 977 978 #ifdef KTRACE 979 if (KTRPOINT(p, KTR_SYSRET)) 980 ktrsysret(p->p_tracep, code, error, rval[0]); 981 #endif 982 } 983 #endif /* COMPAT_LINUX */ 984